Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 77 results for author: <span class="mathjax">Xing, H</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Xing%2C+H">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Xing, H"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Xing%2C+H&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Xing, H"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Xing%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Xing%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Xing%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13000">arXiv:2411.13000</a> <span> [<a href="https://arxiv.org/pdf/2411.13000">pdf</a>, <a href="https://arxiv.org/format/2411.13000">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> NCAirFL: CSI-Free Over-the-Air Federated Learning Based on Non-Coherent Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+H">Haifeng Wen</a>, <a href="/search/cs?searchtype=author&query=Michelusi%2C+N">Nicol貌 Michelusi</a>, <a href="/search/cs?searchtype=author&query=Simeone%2C+O">Osvaldo Simeone</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hong Xing</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13000v1-abstract-short" style="display: inline;"> Over-the-air federated learning (FL), i.e., AirFL, leverages computing primitively over multiple access channels. A long-standing challenge in AirFL is to achieve coherent signal alignment without relying on expensive channel estimation and feedback. This paper proposes NCAirFL, a CSI-free AirFL scheme based on unbiased non-coherent detection at the edge server. By exploiting binary dithering and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13000v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13000v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13000v1-abstract-full" style="display: none;"> Over-the-air federated learning (FL), i.e., AirFL, leverages computing primitively over multiple access channels. A long-standing challenge in AirFL is to achieve coherent signal alignment without relying on expensive channel estimation and feedback. This paper proposes NCAirFL, a CSI-free AirFL scheme based on unbiased non-coherent detection at the edge server. By exploiting binary dithering and a long-term memory based error-compensation mechanism, NCAirFL achieves a convergence rate of order $\mathcal{O}(1/\sqrt{T})$ in terms of the average square norm of the gradient for general non-convex and smooth objectives, where $T$ is the number of communication rounds. Experiments demonstrate the competitive performance of NCAirFL compared to vanilla FL with ideal communications and to coherent transmission-based benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13000v1-abstract-full').style.display = 'none'; document.getElementById('2411.13000v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 2 figures, submitted for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07722">arXiv:2411.07722</a> <span> [<a href="https://arxiv.org/pdf/2411.07722">pdf</a>, <a href="https://arxiv.org/format/2411.07722">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Is Cognition consistent with Perception? Assessing and Mitigating Multimodal Knowledge Conflicts in Document Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shao%2C+Z">Zirui Shao</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+C">Chuwei Luo</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Zhaoqing Zhu</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hangdi Xing</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhi Yu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Q">Qi Zheng</a>, <a href="/search/cs?searchtype=author&query=Bu%2C+J">Jiajun Bu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07722v1-abstract-short" style="display: inline;"> Multimodal large language models (MLLMs) have shown impressive capabilities in document understanding, a rapidly growing research area with significant industrial demand in recent years. As a multimodal task, document understanding requires models to possess both perceptual and cognitive abilities. However, current MLLMs often face conflicts between perception and cognition. Taking a document VQA… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07722v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07722v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07722v1-abstract-full" style="display: none;"> Multimodal large language models (MLLMs) have shown impressive capabilities in document understanding, a rapidly growing research area with significant industrial demand in recent years. As a multimodal task, document understanding requires models to possess both perceptual and cognitive abilities. However, current MLLMs often face conflicts between perception and cognition. Taking a document VQA task (cognition) as an example, an MLLM might generate answers that do not match the corresponding visual content identified by its OCR (perception). This conflict suggests that the MLLM might struggle to establish an intrinsic connection between the information it "sees" and what it "understands." Such conflicts challenge the intuitive notion that cognition is consistent with perception, hindering the performance and explainability of MLLMs. In this paper, we define the conflicts between cognition and perception as Cognition and Perception (C&P) knowledge conflicts, a form of multimodal knowledge conflicts, and systematically assess them with a focus on document understanding. Our analysis reveals that even GPT-4o, a leading MLLM, achieves only 68.6% C&P consistency. To mitigate the C&P knowledge conflicts, we propose a novel method called Multimodal Knowledge Consistency Fine-tuning. This method first ensures task-specific consistency and then connects the cognitive and perceptual knowledge. Our method significantly reduces C&P knowledge conflicts across all tested MLLMs and enhances their performance in both cognitive and perceptual tasks in most scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07722v1-abstract-full').style.display = 'none'; document.getElementById('2411.07722v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15044">arXiv:2410.15044</a> <span> [<a href="https://arxiv.org/pdf/2410.15044">pdf</a>, <a href="https://arxiv.org/format/2410.15044">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Adanonymizer: Interactively Navigating and Balancing the Duality of Privacy and Output Performance in Human-LLM Interaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shuning Zhang</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+X">Xin Yi</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Haobin Xing</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+L">Lyumanshan Ye</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yongquan Hu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hewu Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15044v1-abstract-short" style="display: inline;"> Current Large Language Models (LLMs) cannot support users to precisely balance privacy protection and output performance during individual consultations. We introduce Adanonymizer, an anonymization plug-in that allows users to control this balance by navigating a trade-off curve. A survey (N=221) revealed a privacy paradox, where users frequently disclosed sensitive information despite acknowledgi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15044v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15044v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15044v1-abstract-full" style="display: none;"> Current Large Language Models (LLMs) cannot support users to precisely balance privacy protection and output performance during individual consultations. We introduce Adanonymizer, an anonymization plug-in that allows users to control this balance by navigating a trade-off curve. A survey (N=221) revealed a privacy paradox, where users frequently disclosed sensitive information despite acknowledging privacy risks. The study further demonstrated that privacy risks were not significantly correlated with model output performance, highlighting the potential to navigate this trade-off. Adanonymizer normalizes privacy and utility ratings by type and automates the pseudonymization of sensitive terms based on user preferences, significantly reducing user effort. Its 2D color palette interface visualizes the privacy-utility trade-off, allowing users to adjust the balance by manipulating a point. An evaluation (N=36) compared Adanonymizer with ablation methods and differential privacy techniques, where Adanonymizer significantly reduced modification time, achieved better perceived model performance and overall user preference. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15044v1-abstract-full').style.display = 'none'; document.getElementById('2410.15044v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14931">arXiv:2410.14931</a> <span> [<a href="https://arxiv.org/pdf/2410.14931">pdf</a>, <a href="https://arxiv.org/format/2410.14931">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> "Ghost of the past": identifying and resolving privacy leakage from LLM's memory through proactive user interaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shuning Zhang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+L">Lyumanshan Ye</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+X">Xin Yi</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jingyu Tang</a>, <a href="/search/cs?searchtype=author&query=Shui%2C+B">Bo Shui</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Haobin Xing</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+P">Pengfei Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hewu Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14931v1-abstract-short" style="display: inline;"> Memories, encompassing past inputs in context window and retrieval-augmented generation (RAG), frequently surface during human-LLM interactions, yet users are often unaware of their presence and the associated privacy risks. To address this, we propose MemoAnalyzer, a system for identifying, visualizing, and managing private information within memories. A semi-structured interview (N=40) revealed… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14931v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14931v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14931v1-abstract-full" style="display: none;"> Memories, encompassing past inputs in context window and retrieval-augmented generation (RAG), frequently surface during human-LLM interactions, yet users are often unaware of their presence and the associated privacy risks. To address this, we propose MemoAnalyzer, a system for identifying, visualizing, and managing private information within memories. A semi-structured interview (N=40) revealed that low privacy awareness was the primary challenge, while proactive privacy control emerged as the most common user need. MemoAnalyzer uses a prompt-based method to infer and identify sensitive information from aggregated past inputs, allowing users to easily modify sensitive content. Background color temperature and transparency are mapped to inference confidence and sensitivity, streamlining privacy adjustments. A 5-day evaluation (N=36) comparing MemoAnalyzer with the default GPT setting and a manual modification baseline showed MemoAnalyzer significantly improved privacy awareness and protection without compromising interaction speed. Our study contributes to privacy-conscious LLM design, offering insights into privacy protection for Human-AI interactions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14931v1-abstract-full').style.display = 'none'; document.getElementById('2410.14931v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10899">arXiv:2410.10899</a> <span> [<a href="https://arxiv.org/pdf/2410.10899">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> GPTON: Generative Pre-trained Transformers enhanced with Ontology Narration for accurate annotation of biological data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+R">Rongbin Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wenbo Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jinbo Li</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hanwen Xing</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hua Xu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhao Li</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+W+J">W. Jim Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10899v2-abstract-short" style="display: inline;"> By leveraging GPT-4 for ontology narration, we developed GPTON to infuse structured knowledge into LLMs through verbalized ontology terms, achieving accurate text and ontology annotations for over 68% of gene sets in the top five predictions. Manual evaluations confirm GPTON's robustness, highlighting its potential to harness LLMs and structured knowledge to significantly advance biomedical resear… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10899v2-abstract-full').style.display = 'inline'; document.getElementById('2410.10899v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10899v2-abstract-full" style="display: none;"> By leveraging GPT-4 for ontology narration, we developed GPTON to infuse structured knowledge into LLMs through verbalized ontology terms, achieving accurate text and ontology annotations for over 68% of gene sets in the top five predictions. Manual evaluations confirm GPTON's robustness, highlighting its potential to harness LLMs and structured knowledge to significantly advance biomedical research beyond gene set annotation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10899v2-abstract-full').style.display = 'none'; document.getElementById('2410.10899v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages, 6 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> J.3; I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07917">arXiv:2410.07917</a> <span> [<a href="https://arxiv.org/pdf/2410.07917">pdf</a>, <a href="https://arxiv.org/ps/2410.07917">ps</a>, <a href="https://arxiv.org/format/2410.07917">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Understanding Human Activity with Uncertainty Measure for Novelty in Graph Convolutional Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hao Xing</a>, <a href="/search/cs?searchtype=author&query=Burschka%2C+D">Darius Burschka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07917v1-abstract-short" style="display: inline;"> Understanding human activity is a crucial aspect of developing intelligent robots, particularly in the domain of human-robot collaboration. Nevertheless, existing systems encounter challenges such as over-segmentation, attributed to errors in the up-sampling process of the decoder. In response, we introduce a promising solution: the Temporal Fusion Graph Convolutional Network. This innovative appr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07917v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07917v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07917v1-abstract-full" style="display: none;"> Understanding human activity is a crucial aspect of developing intelligent robots, particularly in the domain of human-robot collaboration. Nevertheless, existing systems encounter challenges such as over-segmentation, attributed to errors in the up-sampling process of the decoder. In response, we introduce a promising solution: the Temporal Fusion Graph Convolutional Network. This innovative approach aims to rectify the inadequate boundary estimation of individual actions within an activity stream and mitigate the issue of over-segmentation in the temporal dimension. Moreover, systems leveraging human activity recognition frameworks for decision-making necessitate more than just the identification of actions. They require a confidence value indicative of the certainty regarding the correspondence between observations and training examples. This is crucial to prevent overly confident responses to unforeseen scenarios that were not part of the training data and may have resulted in mismatches due to weak similarity measures within the system. To address this, we propose the incorporation of a Spectral Normalized Residual connection aimed at enhancing efficient estimation of novelty in observations. This innovative approach ensures the preservation of input distance within the feature space by imposing constraints on the maximum gradients of weight updates. By limiting these gradients, we promote a more robust handling of novel situations, thereby mitigating the risks associated with overconfidence. Our methodology involves the use of a Gaussian process to quantify the distance in feature space. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07917v1-abstract-full').style.display = 'none'; document.getElementById('2410.07917v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 10 figures, The International Journal of Robotics Research</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07912">arXiv:2410.07912</a> <span> [<a href="https://arxiv.org/pdf/2410.07912">pdf</a>, <a href="https://arxiv.org/ps/2410.07912">ps</a>, <a href="https://arxiv.org/format/2410.07912">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Understanding Spatio-Temporal Relations in Human-Object Interaction using Pyramid Graph Convolutional Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hao Xing</a>, <a href="/search/cs?searchtype=author&query=Burschka%2C+D">Darius Burschka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07912v1-abstract-short" style="display: inline;"> Human activities recognition is an important task for an intelligent robot, especially in the field of human-robot collaboration, it requires not only the label of sub-activities but also the temporal structure of the activity. In order to automatically recognize both the label and the temporal structure in sequence of human-object interaction, we propose a novel Pyramid Graph Convolutional Networ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07912v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07912v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07912v1-abstract-full" style="display: none;"> Human activities recognition is an important task for an intelligent robot, especially in the field of human-robot collaboration, it requires not only the label of sub-activities but also the temporal structure of the activity. In order to automatically recognize both the label and the temporal structure in sequence of human-object interaction, we propose a novel Pyramid Graph Convolutional Network (PGCN), which employs a pyramidal encoder-decoder architecture consisting of an attention based graph convolution network and a temporal pyramid pooling module for downsampling and upsampling interaction sequence on the temporal axis, respectively. The system represents the 2D or 3D spatial relation of human and objects from the detection results in video data as a graph. To learn the human-object relations, a new attention graph convolutional network is trained to extract condensed information from the graph representation. To segment action into sub-actions, a novel temporal pyramid pooling module is proposed, which upsamples compressed features back to the original time scale and classifies actions per frame. We explore various attention layers, namely spatial attention, temporal attention and channel attention, and combine different upsampling decoders to test the performance on action recognition and segmentation. We evaluate our model on two challenging datasets in the field of human-object interaction recognition, i.e. Bimanual Actions and IKEA Assembly datasets. We demonstrate that our classifier significantly improves both framewise action recognition and segmentation, e.g., F1 micro and F1@50 scores on Bimanual Actions dataset are improved by $4.3\%$ and $8.5\%$ respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07912v1-abstract-full').style.display = 'none'; document.getElementById('2410.07912v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 6 figures, IROS 2022 conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03530">arXiv:2410.03530</a> <span> [<a href="https://arxiv.org/pdf/2410.03530">pdf</a>, <a href="https://arxiv.org/format/2410.03530">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> PRF: Parallel Resonate and Fire Neuron for Long Sequence Learning in Spiking Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yulong Huang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zunchang Liu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+C">Changchun Feng</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+X">Xiaopeng Lin</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+H">Hongwei Ren</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+H">Haotian Fu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yue Zhou</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hong Xing</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+B">Bojun Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03530v2-abstract-short" style="display: inline;"> Recently, there is growing demand for effective and efficient long sequence modeling, with State Space Models (SSMs) proving to be effective for long sequence tasks. To further reduce energy consumption, SSMs can be adapted to Spiking Neural Networks (SNNs) using spiking functions. However, current spiking-formalized SSMs approaches still rely on float-point matrix-vector multiplication during inf… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03530v2-abstract-full').style.display = 'inline'; document.getElementById('2410.03530v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03530v2-abstract-full" style="display: none;"> Recently, there is growing demand for effective and efficient long sequence modeling, with State Space Models (SSMs) proving to be effective for long sequence tasks. To further reduce energy consumption, SSMs can be adapted to Spiking Neural Networks (SNNs) using spiking functions. However, current spiking-formalized SSMs approaches still rely on float-point matrix-vector multiplication during inference, undermining SNNs' energy advantage. In this work, we address the efficiency and performance challenges of long sequence learning in SNNs simultaneously. First, we propose a decoupled reset method for parallel spiking neuron training, reducing the typical Leaky Integrate-and-Fire (LIF) model's training time from $O(L^2)$ to $O(L\log L)$, effectively speeding up the training by $6.57 \times$ to $16.50 \times$ on sequence lengths $1,024$ to $32,768$. To our best knowledge, this is the first time that parallel computation with a reset mechanism is implemented achieving equivalence to its sequential counterpart. Secondly, to capture long-range dependencies, we propose a Parallel Resonate and Fire (PRF) neuron, which leverages an oscillating membrane potential driven by a resonate mechanism from a differentiable reset function in the complex domain. The PRF enables efficient long sequence learning while maintaining parallel training. Finally, we demonstrate that the proposed spike-driven architecture using PRF achieves performance comparable to Structured SSMs (S4), with two orders of magnitude reduction in energy consumption, outperforming Transformer on Long Range Arena tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03530v2-abstract-full').style.display = 'none'; document.getElementById('2410.03530v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2208.04933 by other authors</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.04649">arXiv:2408.04649</a> <span> [<a href="https://arxiv.org/pdf/2408.04649">pdf</a>, <a href="https://arxiv.org/format/2408.04649">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Chain of Stance: Stance Detection with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+J">Junxia Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Changjiang Wang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hanwen Xing</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+D">Dongming Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yazhou Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.04649v1-abstract-short" style="display: inline;"> Stance detection is an active task in natural language processing (NLP) that aims to identify the author's stance towards a particular target within a text. Given the remarkable language understanding capabilities and encyclopedic prior knowledge of large language models (LLMs), how to explore the potential of LLMs in stance detection has received significant attention. Unlike existing LLM-based a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04649v1-abstract-full').style.display = 'inline'; document.getElementById('2408.04649v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.04649v1-abstract-full" style="display: none;"> Stance detection is an active task in natural language processing (NLP) that aims to identify the author's stance towards a particular target within a text. Given the remarkable language understanding capabilities and encyclopedic prior knowledge of large language models (LLMs), how to explore the potential of LLMs in stance detection has received significant attention. Unlike existing LLM-based approaches that focus solely on fine-tuning with large-scale datasets, we propose a new prompting method, called \textit{Chain of Stance} (CoS). In particular, it positions LLMs as expert stance detectors by decomposing the stance detection process into a series of intermediate, stance-related assertions that culminate in the final judgment. This approach leads to significant improvements in classification performance. We conducted extensive experiments using four SOTA LLMs on the SemEval 2016 dataset, covering the zero-shot and few-shot learning setups. The results indicate that the proposed method achieves state-of-the-art results with an F1 score of 79.84 in the few-shot setting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04649v1-abstract-full').style.display = 'none'; document.getElementById('2408.04649v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.02047">arXiv:2408.02047</a> <span> [<a href="https://arxiv.org/pdf/2408.02047">pdf</a>, <a href="https://arxiv.org/format/2408.02047">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Latency-Aware Resource Allocation for Mobile Edge Generation and Computing via Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yinyu Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xuhui Zhang</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+J">Jinke Ren</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huijun Xing</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yanyan Shen</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.02047v2-abstract-short" style="display: inline;"> Recently, the integration of mobile edge computing (MEC) and generative artificial intelligence (GAI) technology has given rise to a new area called mobile edge generation and computing (MEGC), which offers mobile users heterogeneous services such as task computing and content generation. In this letter, we investigate the joint communication, computation, and the AIGC resource allocation problem… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02047v2-abstract-full').style.display = 'inline'; document.getElementById('2408.02047v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.02047v2-abstract-full" style="display: none;"> Recently, the integration of mobile edge computing (MEC) and generative artificial intelligence (GAI) technology has given rise to a new area called mobile edge generation and computing (MEGC), which offers mobile users heterogeneous services such as task computing and content generation. In this letter, we investigate the joint communication, computation, and the AIGC resource allocation problem in an MEGC system. A latency minimization problem is first formulated to enhance the quality of service for mobile users. Due to the strong coupling of the optimization variables, we propose a new deep reinforcement learning-based algorithm to solve it efficiently. Numerical results demonstrate that the proposed algorithm can achieve lower latency than two baseline algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02047v2-abstract-full').style.display = 'none'; document.getElementById('2408.02047v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 6 figures. This paper has been accepted for publication by IEEE Networking Letters</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.19721">arXiv:2407.19721</a> <span> [<a href="https://arxiv.org/pdf/2407.19721">pdf</a>, <a href="https://arxiv.org/format/2407.19721">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Rina: Enhancing Ring-AllReduce with In-network Aggregation in Distributed Model Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zixuan Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xuandong Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Minglin Li</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yinfan Hu</a>, <a href="/search/cs?searchtype=author&query=Mei%2C+H">Hao Mei</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huifeng Xing</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wanxin Shi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Sen Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yang Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.19721v1-abstract-short" style="display: inline;"> Parameter Server (PS) and Ring-AllReduce (RAR) are two widely utilized synchronization architectures in multi-worker Deep Learning (DL), also referred to as Distributed Deep Learning (DDL). However, PS encounters challenges with the ``incast'' issue, while RAR struggles with problems caused by the long dependency chain. The emerging In-network Aggregation (INA) has been proposed to integrate with… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19721v1-abstract-full').style.display = 'inline'; document.getElementById('2407.19721v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.19721v1-abstract-full" style="display: none;"> Parameter Server (PS) and Ring-AllReduce (RAR) are two widely utilized synchronization architectures in multi-worker Deep Learning (DL), also referred to as Distributed Deep Learning (DDL). However, PS encounters challenges with the ``incast'' issue, while RAR struggles with problems caused by the long dependency chain. The emerging In-network Aggregation (INA) has been proposed to integrate with PS to mitigate its incast issue. However, such PS-based INA has poor incremental deployment abilities as it requires replacing all the switches to show significant performance improvement, which is not cost-effective. In this study, we present the incorporation of INA capabilities into RAR, called RAR with In-Network Aggregation (Rina), to tackle both the problems above. Rina features its agent-worker mechanism. When an INA-capable ToR switch is deployed, all workers in this rack run as one abstracted worker with the help of the agent, resulting in both excellent incremental deployment capabilities and better throughput. We conducted extensive testbed and simulation evaluations to substantiate the throughput advantages of Rina over existing DDL training synchronization structures. Compared with the state-of-the-art PS-based INA methods ATP, Rina can achieve more than 50\% throughput with the same hardware cost. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19721v1-abstract-full').style.display = 'none'; document.getElementById('2407.19721v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in ICNP 2024. Preview version only</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.15502">arXiv:2407.15502</a> <span> [<a href="https://arxiv.org/pdf/2407.15502">pdf</a>, <a href="https://arxiv.org/format/2407.15502">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> WebRPG: Automatic Web Rendering Parameters Generation for Visual Presentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shao%2C+Z">Zirui Shao</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+F">Feiyu Gao</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hangdi Xing</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Zepeng Zhu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhi Yu</a>, <a href="/search/cs?searchtype=author&query=Bu%2C+J">Jiajun Bu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Q">Qi Zheng</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+C">Cong Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.15502v1-abstract-short" style="display: inline;"> In the era of content creation revolution propelled by advancements in generative models, the field of web design remains unexplored despite its critical role in modern digital communication. The web design process is complex and often time-consuming, especially for those with limited expertise. In this paper, we introduce Web Rendering Parameters Generation (WebRPG), a new task that aims at autom… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15502v1-abstract-full').style.display = 'inline'; document.getElementById('2407.15502v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.15502v1-abstract-full" style="display: none;"> In the era of content creation revolution propelled by advancements in generative models, the field of web design remains unexplored despite its critical role in modern digital communication. The web design process is complex and often time-consuming, especially for those with limited expertise. In this paper, we introduce Web Rendering Parameters Generation (WebRPG), a new task that aims at automating the generation for visual presentation of web pages based on their HTML code. WebRPG would contribute to a faster web development workflow. Since there is no existing benchmark available, we develop a new dataset for WebRPG through an automated pipeline. Moreover, we present baseline models, utilizing VAE to manage numerous elements and rendering parameters, along with custom HTML embedding for capturing essential semantic and hierarchical information from HTML. Extensive experiments, including customized quantitative evaluations for this specific task, are conducted to evaluate the quality of the generated results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15502v1-abstract-full').style.display = 'none'; document.getElementById('2407.15502v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ECCV 2024. The dataset and code can be accessed at https://github.com/AlibabaResearch/AdvancedLiterateMachinery/tree/main/DocumentUnderstanding/WebRPG</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.07245">arXiv:2407.07245</a> <span> [<a href="https://arxiv.org/pdf/2407.07245">pdf</a>, <a href="https://arxiv.org/format/2407.07245">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Accelerating Mobile Edge Generation (MEG) by Constrained Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiaoxia Xu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuanwei Liu</a>, <a href="/search/cs?searchtype=author&query=Mu%2C+X">Xidong Mu</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hong Xing</a>, <a href="/search/cs?searchtype=author&query=Nallanathan%2C+A">Arumugam Nallanathan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.07245v2-abstract-short" style="display: inline;"> A novel accelerated mobile edge generation (MEG) framework is proposed for generating high-resolution images on mobile devices. Exploiting a large-scale latent diffusion model (LDM) distributed across edge server (ES) and user equipment (UE), cost-efficient artificial intelligence generated content (AIGC) is achieved by transmitting low-dimensional features between ES and UE. To reduce overheads o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07245v2-abstract-full').style.display = 'inline'; document.getElementById('2407.07245v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.07245v2-abstract-full" style="display: none;"> A novel accelerated mobile edge generation (MEG) framework is proposed for generating high-resolution images on mobile devices. Exploiting a large-scale latent diffusion model (LDM) distributed across edge server (ES) and user equipment (UE), cost-efficient artificial intelligence generated content (AIGC) is achieved by transmitting low-dimensional features between ES and UE. To reduce overheads of both distributed computations and transmissions, a dynamic diffusion and feature merging scheme is conceived. By jointly optimizing the denoising steps and feature merging ratio, the image generation quality is maximized subject to latency and energy consumption constraints. To address this problem and tailor LDM sub-models, a low-complexity MEG acceleration protocol is developed. Particularly, a backbone meta-architecture is trained via offline distillation. Then, dynamic diffusion and feature merging are determined in online channel environment, which can be viewed as a constrained Markov Decision Process (MDP). A constrained variational policy optimization (CVPO) based MEG algorithm is further proposed for constraint-guaranteed learning, namely MEG-CVPO. Numerical results verify that: 1) The proposed framework can generate 1024$\times$1024 high-quality images over noisy channels while reducing over $40\%$ latency compared to conventional generation schemes. 2) The developed MEG-CVPO effectively mitigates constraint violations, thus flexibly controlling the trade-off between image distortion and generation costs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07245v2-abstract-full').style.display = 'none'; document.getElementById('2407.07245v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.18100">arXiv:2406.18100</a> <span> [<a href="https://arxiv.org/pdf/2406.18100">pdf</a>, <a href="https://arxiv.org/format/2406.18100">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Natural Language but Omitted? On the Ineffectiveness of Large Language Models' privacy policy from End-users' Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shuning Zhang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Haobin Xing</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+X">Xin Yi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hewu Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.18100v1-abstract-short" style="display: inline;"> LLMs driven products were increasingly prevalent in our daily lives, With a natural language based interaction style, people may potentially leak their personal private information. Thus, privacy policy and user agreement played an important role in regulating and alerting people. However, there lacked the work examining the reading of LLM's privacy policy. Thus, we conducted the first user study… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18100v1-abstract-full').style.display = 'inline'; document.getElementById('2406.18100v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.18100v1-abstract-full" style="display: none;"> LLMs driven products were increasingly prevalent in our daily lives, With a natural language based interaction style, people may potentially leak their personal private information. Thus, privacy policy and user agreement played an important role in regulating and alerting people. However, there lacked the work examining the reading of LLM's privacy policy. Thus, we conducted the first user study to let participants read the privacy policy and user agreement with two different styles (a cursory and detailed style). We found users lack important information upon cursory reading and even detailed reading. Besides, their privacy concerns was not solved even upon detailed reading. We provided four design implications based on the findings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18100v1-abstract-full').style.display = 'none'; document.getElementById('2406.18100v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.11569">arXiv:2406.11569</a> <span> [<a href="https://arxiv.org/pdf/2406.11569">pdf</a>, <a href="https://arxiv.org/format/2406.11569">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Pre-Training and Personalized Fine-Tuning via Over-the-Air Federated Meta-Learning: Convergence-Generalization Trade-Offs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+H">Haifeng Wen</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hong Xing</a>, <a href="/search/cs?searchtype=author&query=Simeone%2C+O">Osvaldo Simeone</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.11569v3-abstract-short" style="display: inline;"> For modern artificial intelligence (AI) applications such as large language models (LLMs), the training paradigm has recently shifted to pre-training followed by fine-tuning. Furthermore, owing to dwindling open repositories of data and thanks to efforts to democratize access to AI models, pre-training is expected to increasingly migrate from the current centralized deployments to federated learni… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11569v3-abstract-full').style.display = 'inline'; document.getElementById('2406.11569v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.11569v3-abstract-full" style="display: none;"> For modern artificial intelligence (AI) applications such as large language models (LLMs), the training paradigm has recently shifted to pre-training followed by fine-tuning. Furthermore, owing to dwindling open repositories of data and thanks to efforts to democratize access to AI models, pre-training is expected to increasingly migrate from the current centralized deployments to federated learning (FL) implementations. Meta-learning provides a general framework in which pre-training and fine-tuning can be formalized. Meta-learning-based personalized FL (meta-pFL) moves beyond basic personalization by targeting generalization to new agents and tasks. This paper studies the generalization performance of meta-pFL for a wireless setting in which the agents participating in the pre-training phase, i.e., meta-learning, are connected via a shared wireless channel to the server. Adopting over-the-air computing, we study the trade-off between generalization to new agents and tasks, on the one hand, and convergence, on the other hand. The trade-off arises from the fact that channel impairments may enhance generalization, while degrading convergence. Extensive numerical results validate the theory. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11569v3-abstract-full').style.display = 'none'; document.getElementById('2406.11569v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">39 pages, 7 figures, submitted for possible journal publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.08754">arXiv:2406.08754</a> <span> [<a href="https://arxiv.org/pdf/2406.08754">pdf</a>, <a href="https://arxiv.org/format/2406.08754">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Exploiting Uncommon Text-Encoded Structures for Automated Jailbreaks in LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+B">Bangxin Li</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hengrui Xing</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chao Huang</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+J">Jin Qian</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+H">Huangqing Xiao</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+L">Linfeng Feng</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+C">Cong Tian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.08754v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) are widely used in natural language processing but face the risk of jailbreak attacks that maliciously induce them to generate harmful content. Existing jailbreak attacks, including character-level and context-level attacks, mainly focus on the prompt of the plain text without specifically exploring the significant influence of its structure. In this paper, we focus on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08754v2-abstract-full').style.display = 'inline'; document.getElementById('2406.08754v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.08754v2-abstract-full" style="display: none;"> Large Language Models (LLMs) are widely used in natural language processing but face the risk of jailbreak attacks that maliciously induce them to generate harmful content. Existing jailbreak attacks, including character-level and context-level attacks, mainly focus on the prompt of the plain text without specifically exploring the significant influence of its structure. In this paper, we focus on studying how prompt structure contributes to the jailbreak attack. We introduce a novel structure-level attack method based on tail structures that are rarely used during LLM training, which we refer to as Uncommon Text-Encoded Structure (UTES). We extensively study 12 UTESs templates and 6 obfuscation methods to build an effective automated jailbreak tool named StructuralSleight that contains three escalating attack strategies: Structural Attack, Structural and Character/Context Obfuscation Attack, and Fully Obfuscated Structural Attack. Extensive experiments on existing LLMs show that StructuralSleight significantly outperforms baseline methods. In particular, the attack success rate reaches 94.62\% on GPT-4o, which has not been addressed by state-of-the-art techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08754v2-abstract-full').style.display = 'none'; document.getElementById('2406.08754v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.02266">arXiv:2406.02266</a> <span> [<a href="https://arxiv.org/pdf/2406.02266">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Retrieval-Augmented LMs with a Two-stage Consistency Learning Compressor </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+C">Chuankai Xu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+D">Dongming Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bo Wang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hanwen Xing</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.02266v1-abstract-short" style="display: inline;"> Despite the prevalence of retrieval-augmented language models (RALMs), the seamless integration of these models with retrieval mechanisms to enhance performance in document-based tasks remains challenging. While some post-retrieval processing Retrieval-Augmented Generation (RAG) methods have achieved success, most still lack the ability to distinguish pertinent from extraneous information, leading… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02266v1-abstract-full').style.display = 'inline'; document.getElementById('2406.02266v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.02266v1-abstract-full" style="display: none;"> Despite the prevalence of retrieval-augmented language models (RALMs), the seamless integration of these models with retrieval mechanisms to enhance performance in document-based tasks remains challenging. While some post-retrieval processing Retrieval-Augmented Generation (RAG) methods have achieved success, most still lack the ability to distinguish pertinent from extraneous information, leading to potential inconsistencies and reduced precision in the generated output, which subsequently affects the truthfulness of the language model's responses. To address these limitations, this work proposes a novel two-stage consistency learning approach for retrieved information compression in retrieval-augmented language models to enhance performance. By incorporating consistency learning, the aim is to generate summaries that maintain coherence and alignment with the intended semantic representations of a teacher model while improving faithfulness to the original retrieved documents. The proposed method is empirically validated across multiple datasets, demonstrating notable enhancements in precision and efficiency for question-answering tasks. It outperforms existing baselines and showcases the synergistic effects of combining contrastive and consistency learning paradigms within the retrieval-augmented generation framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02266v1-abstract-full').style.display = 'none'; document.getElementById('2406.02266v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.14709">arXiv:2405.14709</a> <span> [<a href="https://arxiv.org/pdf/2405.14709">pdf</a>, <a href="https://arxiv.org/format/2405.14709">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> OpFlowTalker: Realistic and Natural Talking Face Generation via Optical Flow Guidance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ge%2C+S">Shuheng Ge</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Haoyu Xing</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Li Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiangqian Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.14709v2-abstract-short" style="display: inline;"> Creating realistic, natural, and lip-readable talking face videos remains a formidable challenge. Previous research primarily concentrated on generating and aligning single-frame images while overlooking the smoothness of frame-to-frame transitions and temporal dependencies. This often compromised visual quality and effects in practical settings, particularly when handling complex facial data and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14709v2-abstract-full').style.display = 'inline'; document.getElementById('2405.14709v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.14709v2-abstract-full" style="display: none;"> Creating realistic, natural, and lip-readable talking face videos remains a formidable challenge. Previous research primarily concentrated on generating and aligning single-frame images while overlooking the smoothness of frame-to-frame transitions and temporal dependencies. This often compromised visual quality and effects in practical settings, particularly when handling complex facial data and audio content, which frequently led to semantically incongruent visual illusions. Specifically, synthesized videos commonly featured disorganized lip movements, making them difficult to understand and recognize. To overcome these limitations, this paper introduces the application of optical flow to guide facial image generation, enhancing inter-frame continuity and semantic consistency. We propose "OpFlowTalker", a novel approach that utilizes predicted optical flow changes from audio inputs rather than direct image predictions. This method smooths image transitions and aligns changes with semantic content. Moreover, it employs a sequence fusion technique to replace the independent generation of single frames, thus preserving contextual information and maintaining temporal coherence. We also developed an optical flow synchronization module that regulates both full-face and lip movements, optimizing visual synthesis by balancing regional dynamics. Furthermore, we introduce a Visual Text Consistency Score (VTCS) that accurately measures lip-readability in synthesized videos. Extensive empirical evidence validates the effectiveness of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14709v2-abstract-full').style.display = 'none'; document.getElementById('2405.14709v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.00736">arXiv:2405.00736</a> <span> [<a href="https://arxiv.org/pdf/2405.00736">pdf</a>, <a href="https://arxiv.org/format/2405.00736">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Joint Signal Detection and Automatic Modulation Classification via Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huijun Xing</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xuhui Zhang</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+S">Shuo Chang</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+J">Jinke Ren</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zixun Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jie Xu</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.00736v1-abstract-short" style="display: inline;"> Signal detection and modulation classification are two crucial tasks in various wireless communication systems. Different from prior works that investigate them independently, this paper studies the joint signal detection and automatic modulation classification (AMC) by considering a realistic and complex scenario, in which multiple signals with different modulation schemes coexist at different ca… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.00736v1-abstract-full').style.display = 'inline'; document.getElementById('2405.00736v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.00736v1-abstract-full" style="display: none;"> Signal detection and modulation classification are two crucial tasks in various wireless communication systems. Different from prior works that investigate them independently, this paper studies the joint signal detection and automatic modulation classification (AMC) by considering a realistic and complex scenario, in which multiple signals with different modulation schemes coexist at different carrier frequencies. We first generate a coexisting RADIOML dataset (CRML23) to facilitate the joint design. Different from the publicly available AMC dataset ignoring the signal detection step and containing only one signal, our synthetic dataset covers the more realistic multiple-signal coexisting scenario. Then, we present a joint framework for detection and classification (JDM) for such a multiple-signal coexisting environment, which consists of two modules for signal detection and AMC, respectively. In particular, these two modules are interconnected using a designated data structure called "proposal". Finally, we conduct extensive simulations over the newly developed dataset, which demonstrate the effectiveness of our designs. Our code and dataset are now available as open-source (https://github.com/Singingkettle/ChangShuoRadioData). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.00736v1-abstract-full').style.display = 'none'; document.getElementById('2405.00736v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.16891">arXiv:2404.16891</a> <span> [<a href="https://arxiv.org/pdf/2404.16891">pdf</a>, <a href="https://arxiv.org/format/2404.16891">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Attacks on Third-Party APIs of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Wanru Zhao</a>, <a href="/search/cs?searchtype=author&query=Khazanchi%2C+V">Vidit Khazanchi</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Haodi Xing</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xuanli He</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Q">Qiongkai Xu</a>, <a href="/search/cs?searchtype=author&query=Lane%2C+N+D">Nicholas Donald Lane</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.16891v1-abstract-short" style="display: inline;"> Large language model (LLM) services have recently begun offering a plugin ecosystem to interact with third-party API services. This innovation enhances the capabilities of LLMs, but it also introduces risks, as these plugins developed by various third parties cannot be easily trusted. This paper proposes a new attacking framework to examine security and safety vulnerabilities within LLM platforms… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16891v1-abstract-full').style.display = 'inline'; document.getElementById('2404.16891v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.16891v1-abstract-full" style="display: none;"> Large language model (LLM) services have recently begun offering a plugin ecosystem to interact with third-party API services. This innovation enhances the capabilities of LLMs, but it also introduces risks, as these plugins developed by various third parties cannot be easily trusted. This paper proposes a new attacking framework to examine security and safety vulnerabilities within LLM platforms that incorporate third-party services. Applying our framework specifically to widely used LLMs, we identify real-world malicious attacks across various domains on third-party APIs that can imperceptibly modify LLM outputs. The paper discusses the unique challenges posed by third-party API integration and offers strategic possibilities to improve the security and safety of LLM ecosystems moving forward. Our code is released at https://github.com/vk0812/Third-Party-Attacks-on-LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.16891v1-abstract-full').style.display = 'none'; document.getElementById('2404.16891v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2024 Workshop on Secure and Trustworthy Large Language Models</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.14656">arXiv:2401.14656</a> <span> [<a href="https://arxiv.org/pdf/2401.14656">pdf</a>, <a href="https://arxiv.org/format/2401.14656">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Scientific Large Language Models: A Survey on Biological & Chemical Domains </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Keyang Ding</a>, <a href="/search/cs?searchtype=author&query=Lyv%2C+T">Tianwen Lyv</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinda Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+Q">Qingyu Yin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yiwen Zhang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jing Yu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuhao Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaotong Li</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+Z">Zhuoyi Xiang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+K">Kehua Feng</a>, <a href="/search/cs?searchtype=author&query=Zhuang%2C+X">Xiang Zhuang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zeyuan Wang</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+M">Ming Qin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Mengyao Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jinlu Zhang</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+J">Jiyu Cui</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+T">Tao Huang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+P">Pengju Yan</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+R">Renjun Xu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hongyang Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaolin Li</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+X">Xiaohui Fan</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huabin Xing</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Huajun Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.14656v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) have emerged as a transformative power in enhancing natural language comprehension, representing a significant stride toward artificial general intelligence. The application of LLMs extends beyond conventional linguistic boundaries, encompassing specialized linguistic systems developed within various scientific disciplines. This growing interest has led to the advent o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.14656v2-abstract-full').style.display = 'inline'; document.getElementById('2401.14656v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.14656v2-abstract-full" style="display: none;"> Large Language Models (LLMs) have emerged as a transformative power in enhancing natural language comprehension, representing a significant stride toward artificial general intelligence. The application of LLMs extends beyond conventional linguistic boundaries, encompassing specialized linguistic systems developed within various scientific disciplines. This growing interest has led to the advent of scientific LLMs, a novel subclass specifically engineered for facilitating scientific discovery. As a burgeoning area in the community of AI for Science, scientific LLMs warrant comprehensive exploration. However, a systematic and up-to-date survey introducing them is currently lacking. In this paper, we endeavor to methodically delineate the concept of "scientific language", whilst providing a thorough review of the latest advancements in scientific LLMs. Given the expansive realm of scientific disciplines, our analysis adopts a focused lens, concentrating on the biological and chemical domains. This includes an in-depth examination of LLMs for textual knowledge, small molecules, macromolecular proteins, genomic sequences, and their combinations, analyzing them in terms of model architectures, capabilities, datasets, and evaluation. Finally, we critically examine the prevailing challenges and point out promising research directions along with the advances of LLMs. By offering a comprehensive overview of technical developments in this field, this survey aspires to be an invaluable resource for researchers navigating the intricate landscape of scientific LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.14656v2-abstract-full').style.display = 'none'; document.getElementById('2401.14656v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.01522">arXiv:2401.01522</a> <span> [<a href="https://arxiv.org/pdf/2401.01522">pdf</a>, <a href="https://arxiv.org/format/2401.01522">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LORE++: Logical Location Regression Network for Table Structure Recognition with Pre-training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Long%2C+R">Rujiao Long</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hangdi Xing</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhibo Yang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Q">Qi Zheng</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhi Yu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+C">Cong Yao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+F">Fei Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.01522v1-abstract-short" style="display: inline;"> Table structure recognition (TSR) aims at extracting tables in images into machine-understandable formats. Recent methods solve this problem by predicting the adjacency relations of detected cell boxes or learning to directly generate the corresponding markup sequences from the table images. However, existing approaches either count on additional heuristic rules to recover the table structures, or… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01522v1-abstract-full').style.display = 'inline'; document.getElementById('2401.01522v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.01522v1-abstract-full" style="display: none;"> Table structure recognition (TSR) aims at extracting tables in images into machine-understandable formats. Recent methods solve this problem by predicting the adjacency relations of detected cell boxes or learning to directly generate the corresponding markup sequences from the table images. However, existing approaches either count on additional heuristic rules to recover the table structures, or face challenges in capturing long-range dependencies within tables, resulting in increased complexity. In this paper, we propose an alternative paradigm. We model TSR as a logical location regression problem and propose a new TSR framework called LORE, standing for LOgical location REgression network, which for the first time regresses logical location as well as spatial location of table cells in a unified network. Our proposed LORE is conceptually simpler, easier to train, and more accurate than other paradigms of TSR. Moreover, inspired by the persuasive success of pre-trained models on a number of computer vision and natural language processing tasks, we propose two pre-training tasks to enrich the spatial and logical representations at the feature level of LORE, resulting in an upgraded version called LORE++. The incorporation of pre-training in LORE++ has proven to enjoy significant advantages, leading to a substantial enhancement in terms of accuracy, generalization, and few-shot capability compared to its predecessor. Experiments on standard benchmarks against methods of previous paradigms demonstrate the superiority of LORE++, which highlights the potential and promising prospect of the logical location regression paradigm for TSR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01522v1-abstract-full').style.display = 'none'; document.getElementById('2401.01522v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:2303.03730</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.16606">arXiv:2310.16606</a> <span> [<a href="https://arxiv.org/pdf/2310.16606">pdf</a>, <a href="https://arxiv.org/ps/2310.16606">ps</a>, <a href="https://arxiv.org/format/2310.16606">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> AirFL-Mem: Improving Communication-Learning Trade-Off by Long-Term Memory </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+H">Haifeng Wen</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hong Xing</a>, <a href="/search/cs?searchtype=author&query=Simeone%2C+O">Osvaldo Simeone</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.16606v2-abstract-short" style="display: inline;"> Addressing the communication bottleneck inherent in federated learning (FL), over-the-air FL (AirFL) has emerged as a promising solution, which is, however, hampered by deep fading conditions. In this paper, we propose AirFL-Mem, a novel scheme designed to mitigate the impact of deep fading by implementing a \emph{long-term} memory mechanism. Convergence bounds are provided that account for long-t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.16606v2-abstract-full').style.display = 'inline'; document.getElementById('2310.16606v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.16606v2-abstract-full" style="display: none;"> Addressing the communication bottleneck inherent in federated learning (FL), over-the-air FL (AirFL) has emerged as a promising solution, which is, however, hampered by deep fading conditions. In this paper, we propose AirFL-Mem, a novel scheme designed to mitigate the impact of deep fading by implementing a \emph{long-term} memory mechanism. Convergence bounds are provided that account for long-term memory, as well as for existing AirFL variants with short-term memory, for general non-convex objectives. The theory demonstrates that AirFL-Mem exhibits the same convergence rate of federated averaging (FedAvg) with ideal communication, while the performance of existing schemes is generally limited by error floors. The theoretical results are also leveraged to propose a novel convex optimization strategy for the truncation threshold used for power control in the presence of Rayleigh fading channels. Experimental results validate the analysis, confirming the advantages of a long-term memory mechanism for the mitigation of deep fading. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.16606v2-abstract-full').style.display = 'none'; document.getElementById('2310.16606v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 3 figures, submitted for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.09533">arXiv:2310.09533</a> <span> [<a href="https://arxiv.org/pdf/2310.09533">pdf</a>, <a href="https://arxiv.org/format/2310.09533">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Towards End-to-End Unsupervised Saliency Detection with Self-Supervised Top-Down Context </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yicheng Song</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+S">Shuyong Gao</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Haozhe Xing</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yiting Cheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yan Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenqiang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.09533v1-abstract-short" style="display: inline;"> Unsupervised salient object detection aims to detect salient objects without using supervision signals eliminating the tedious task of manually labeling salient objects. To improve training efficiency, end-to-end methods for USOD have been proposed as a promising alternative. However, current solutions rely heavily on noisy handcraft labels and fail to mine rich semantic information from deep feat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.09533v1-abstract-full').style.display = 'inline'; document.getElementById('2310.09533v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.09533v1-abstract-full" style="display: none;"> Unsupervised salient object detection aims to detect salient objects without using supervision signals eliminating the tedious task of manually labeling salient objects. To improve training efficiency, end-to-end methods for USOD have been proposed as a promising alternative. However, current solutions rely heavily on noisy handcraft labels and fail to mine rich semantic information from deep features. In this paper, we propose a self-supervised end-to-end salient object detection framework via top-down context. Specifically, motivated by contrastive learning, we exploit the self-localization from the deepest feature to construct the location maps which are then leveraged to learn the most instructive segmentation guidance. Further considering the lack of detailed information in deepest features, we exploit the detail-boosting refiner module to enrich the location labels with details. Moreover, we observe that due to lack of supervision, current unsupervised saliency models tend to detect non-salient objects that are salient in some other samples of corresponding scenarios. To address this widespread issue, we design a novel Unsupervised Non-Salient Suppression (UNSS) method developing the ability to ignore non-salient objects. Extensive experiments on benchmark datasets demonstrate that our method achieves leading performance among the recent end-to-end methods and most of the multi-stage solutions. The code is available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.09533v1-abstract-full').style.display = 'none'; document.getElementById('2310.09533v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by ACM MM 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.05357">arXiv:2307.05357</a> <span> [<a href="https://arxiv.org/pdf/2307.05357">pdf</a>, <a href="https://arxiv.org/format/2307.05357">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Over-the-Air Computation in OFDM Systems with Imperfect Channel State Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yilong Chen</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huijun Xing</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jie Xu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+L">Lexi Xu</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.05357v1-abstract-short" style="display: inline;"> This paper studies the over-the-air computation (AirComp) in an orthogonal frequency division multiplexing (OFDM) system with imperfect channel state information (CSI), in which multiple single-antenna wireless devices (WDs) simultaneously send uncoded signals to a multi-antenna access point (AP) for distributed functional computation over multiple subcarriers. In particular, we consider two scena… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.05357v1-abstract-full').style.display = 'inline'; document.getElementById('2307.05357v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.05357v1-abstract-full" style="display: none;"> This paper studies the over-the-air computation (AirComp) in an orthogonal frequency division multiplexing (OFDM) system with imperfect channel state information (CSI), in which multiple single-antenna wireless devices (WDs) simultaneously send uncoded signals to a multi-antenna access point (AP) for distributed functional computation over multiple subcarriers. In particular, we consider two scenarios with best-effort and error-constrained computation tasks, with the objectives of minimizing the average computation mean squared error (MSE) and the computation outage probability over the multiple subcarriers, respectively. Towards this end, we jointly optimize the transmit coefficients at the WDs and the receive beamforming vectors at the AP over subcarriers, subject to the maximum transmit power constraints at individual WDs. First, for the special case with a single receive antenna at the AP, we propose the semi-closed-form globally optimal solutions to the two problems using the Lagrange-duality method. It is shown that at each subcarrier, the WDs' optimized power control policy for average MSE minimization follows a regularized channel inversion structure, while that for computation outage probability minimization follows an on-off regularized channel inversion, with the regularization dependent on the transmit power budget and channel estimation error. Next, for the general case with multiple receive antennas at the AP, we present efficient algorithms based on alternating optimization and convex optimization to find converged solutions to both problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.05357v1-abstract-full').style.display = 'none'; document.getElementById('2307.05357v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.14109">arXiv:2306.14109</a> <span> [<a href="https://arxiv.org/pdf/2306.14109">pdf</a>, <a href="https://arxiv.org/format/2306.14109">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> When SAM Meets Sonar Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lin Wang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiufen Ye</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+L">Liqiang Zhu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+W">Weijie Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianguo Zhang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huiming Xing</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+C">Chao Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.14109v1-abstract-short" style="display: inline;"> Segment Anything Model (SAM) has revolutionized the way of segmentation. However, SAM's performance may decline when applied to tasks involving domains that differ from natural images. Nonetheless, by employing fine-tuning techniques, SAM exhibits promising capabilities in specific domains, such as medicine and planetary science. Notably, there is a lack of research on the application of SAM to so… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.14109v1-abstract-full').style.display = 'inline'; document.getElementById('2306.14109v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.14109v1-abstract-full" style="display: none;"> Segment Anything Model (SAM) has revolutionized the way of segmentation. However, SAM's performance may decline when applied to tasks involving domains that differ from natural images. Nonetheless, by employing fine-tuning techniques, SAM exhibits promising capabilities in specific domains, such as medicine and planetary science. Notably, there is a lack of research on the application of SAM to sonar imaging. In this paper, we aim to address this gap by conducting a comprehensive investigation of SAM's performance on sonar images. Specifically, we evaluate SAM using various settings on sonar images. Additionally, we fine-tune SAM using effective methods both with prompts and for semantic segmentation, thereby expanding its applicability to tasks requiring automated segmentation. Experimental results demonstrate a significant improvement in the performance of the fine-tuned SAM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.14109v1-abstract-full').style.display = 'none'; document.getElementById('2306.14109v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.06603">arXiv:2306.06603</a> <span> [<a href="https://arxiv.org/pdf/2306.06603">pdf</a>, <a href="https://arxiv.org/ps/2306.06603">ps</a>, <a href="https://arxiv.org/format/2306.06603">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Task-Oriented Integrated Sensing, Computation and Communication for Wireless Edge AI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hong Xing</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+G">Guangxu Zhu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Dongzhu Liu</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+H">Haifeng Wen</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaibin Huang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+K">Kaishun Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.06603v1-abstract-short" style="display: inline;"> With the advent of emerging IoT applications such as autonomous driving, digital-twin and metaverse etc. featuring massive data sensing, analyzing and inference as well critical latency in beyond 5G (B5G) networks, edge artificial intelligence (AI) has been proposed to provide high-performance computation of a conventional cloud down to the network edge. Recently, convergence of wireless sensing,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06603v1-abstract-full').style.display = 'inline'; document.getElementById('2306.06603v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.06603v1-abstract-full" style="display: none;"> With the advent of emerging IoT applications such as autonomous driving, digital-twin and metaverse etc. featuring massive data sensing, analyzing and inference as well critical latency in beyond 5G (B5G) networks, edge artificial intelligence (AI) has been proposed to provide high-performance computation of a conventional cloud down to the network edge. Recently, convergence of wireless sensing, computation and communication (SC${}^2$) for specific edge AI tasks, has aroused paradigm shift by enabling (partial) sharing of the radio-frequency (RF) transceivers and information processing pipelines among these three fundamental functionalities of IoT. However, most existing design frameworks separate these designs incurring unnecessary signaling overhead and waste of energy, and it is therefore of paramount importance to advance fully integrated sensing, computation and communication (ISCC) to achieve ultra-reliable and low-latency edge intelligence acquisition. In this article, we provide an overview of principles of enabling ISCC technologies followed by two concrete use cases of edge AI tasks demonstrating the advantage of task-oriented ISCC, and pointed out some practical challenges in edge AI design with advanced ISCC solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06603v1-abstract-full').style.display = 'none'; document.getElementById('2306.06603v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 6 figures, submitted for possible journal publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.11135">arXiv:2305.11135</a> <span> [<a href="https://arxiv.org/pdf/2305.11135">pdf</a>, <a href="https://arxiv.org/format/2305.11135">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Convergence Analysis of Over-the-Air FL with Compression and Power Control via Clipping </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+H">Haifeng Wen</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hong Xing</a>, <a href="/search/cs?searchtype=author&query=Simeone%2C+O">Osvaldo Simeone</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.11135v1-abstract-short" style="display: inline;"> One of the key challenges towards the deployment of over-the-air federated learning (AirFL) is the design of mechanisms that can comply with the power and bandwidth constraints of the shared channel, while causing minimum deterioration to the learning performance as compared to baseline noiseless implementations. For additive white Gaussian noise (AWGN) channels with instantaneous per-device power… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.11135v1-abstract-full').style.display = 'inline'; document.getElementById('2305.11135v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.11135v1-abstract-full" style="display: none;"> One of the key challenges towards the deployment of over-the-air federated learning (AirFL) is the design of mechanisms that can comply with the power and bandwidth constraints of the shared channel, while causing minimum deterioration to the learning performance as compared to baseline noiseless implementations. For additive white Gaussian noise (AWGN) channels with instantaneous per-device power constraints, prior work has demonstrated the optimality of a power control mechanism based on norm clipping. This was done through the minimization of an upper bound on the optimality gap for smooth learning objectives satisfying the Polyak-艁ojasiewicz (PL) condition. In this paper, we make two contributions to the development of AirFL based on norm clipping, which we refer to as AirFL-Clip. First, we provide a convergence bound for AirFLClip that applies to general smooth and non-convex learning objectives. Unlike existing results, the derived bound is free from run-specific parameters, thus supporting an offline evaluation. Second, we extend AirFL-Clip to include Top-k sparsification and linear compression. For this generalized protocol, referred to as AirFL-Clip-Comp, we derive a convergence bound for general smooth and non-convex learning objectives. We argue, and demonstrate via experiments, that the only time-varying quantities present in the bound can be efficiently estimated offline by leveraging the well-studied properties of sparse recovery algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.11135v1-abstract-full').style.display = 'none'; document.getElementById('2305.11135v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 3 figures, submitted for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.08468">arXiv:2305.08468</a> <span> [<a href="https://arxiv.org/pdf/2305.08468">pdf</a>, <a href="https://arxiv.org/format/2305.08468">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3589785">10.1145/3589785 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> PolarDB-IMCI: A Cloud-Native HTAP Database System at Alibaba </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jianying Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+T">Tongliang Li</a>, <a href="/search/cs?searchtype=author&query=Song%2C+H">Haoze Song</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xinjun Yang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Wenchao Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+F">Feifei Li</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+B">Baoyue Yan</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Q">Qianqian Wu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+Y">Yukun Liang</a>, <a href="/search/cs?searchtype=author&query=Ying%2C+C">Chengjun Ying</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yujie Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Baokai Chen</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+C">Chang Cai</a>, <a href="/search/cs?searchtype=author&query=Ruan%2C+Y">Yubin Ruan</a>, <a href="/search/cs?searchtype=author&query=Weng%2C+X">Xiaoyi Weng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shibin Chen</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+L">Liang Yin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chengzhong Yang</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+X">Xin Cai</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hongyan Xing</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+N">Nanlong Yu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiaofei Chen</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+D">Dapeng Huang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jianling Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.08468v1-abstract-short" style="display: inline;"> Cloud-native databases have become the de-facto choice for mission-critical applications on the cloud due to the need for high availability, resource elasticity, and cost efficiency. Meanwhile, driven by the increasing connectivity between data generation and analysis, users prefer a single database to efficiently process both OLTP and OLAP workloads, which enhances data freshness and reduces the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.08468v1-abstract-full').style.display = 'inline'; document.getElementById('2305.08468v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.08468v1-abstract-full" style="display: none;"> Cloud-native databases have become the de-facto choice for mission-critical applications on the cloud due to the need for high availability, resource elasticity, and cost efficiency. Meanwhile, driven by the increasing connectivity between data generation and analysis, users prefer a single database to efficiently process both OLTP and OLAP workloads, which enhances data freshness and reduces the complexity of data synchronization and the overall business cost. In this paper, we summarize five crucial design goals for a cloud-native HTAP database based on our experience and customers' feedback, i.e., transparency, competitive OLAP performance, minimal perturbation on OLTP workloads, high data freshness, and excellent resource elasticity. As our solution to realize these goals, we present PolarDB-IMCI, a cloud-native HTAP database system designed and deployed at Alibaba Cloud. Our evaluation results show that PolarDB-IMCI is able to handle HTAP efficiently on both experimental and production workloads; notably, it speeds up analytical queries up to $\times149$ on TPC-H (100 $GB$). PolarDB-IMCI introduces low visibility delay and little performance perturbation on OLTP workloads (< 5%), and resource elasticity can be achieved by scaling out in tens of seconds. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.08468v1-abstract-full').style.display = 'none'; document.getElementById('2305.08468v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 16 figures, to be published in ACM SIGMOD 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.03730">arXiv:2303.03730</a> <span> [<a href="https://arxiv.org/pdf/2303.03730">pdf</a>, <a href="https://arxiv.org/format/2303.03730">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LORE: Logical Location Regression Network for Table Structure Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hangdi Xing</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+F">Feiyu Gao</a>, <a href="/search/cs?searchtype=author&query=Long%2C+R">Rujiao Long</a>, <a href="/search/cs?searchtype=author&query=Bu%2C+J">Jiajun Bu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Q">Qi Zheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Liangcheng Li</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+C">Cong Yao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhi Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.03730v1-abstract-short" style="display: inline;"> Table structure recognition (TSR) aims at extracting tables in images into machine-understandable formats. Recent methods solve this problem by predicting the adjacency relations of detected cell boxes, or learning to generate the corresponding markup sequences from the table images. However, they either count on additional heuristic rules to recover the table structures, or require a huge amount… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.03730v1-abstract-full').style.display = 'inline'; document.getElementById('2303.03730v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.03730v1-abstract-full" style="display: none;"> Table structure recognition (TSR) aims at extracting tables in images into machine-understandable formats. Recent methods solve this problem by predicting the adjacency relations of detected cell boxes, or learning to generate the corresponding markup sequences from the table images. However, they either count on additional heuristic rules to recover the table structures, or require a huge amount of training data and time-consuming sequential decoders. In this paper, we propose an alternative paradigm. We model TSR as a logical location regression problem and propose a new TSR framework called LORE, standing for LOgical location REgression network, which for the first time combines logical location regression together with spatial location regression of table cells. Our proposed LORE is conceptually simpler, easier to train and more accurate than previous TSR models of other paradigms. Experiments on standard benchmarks demonstrate that LORE consistently outperforms prior arts. Code is available at https:// github.com/AlibabaResearch/AdvancedLiterateMachinery/tree/main/DocumentUnderstanding/LORE-TSR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.03730v1-abstract-full').style.display = 'none'; document.getElementById('2303.03730v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.13546">arXiv:2301.13546</a> <span> [<a href="https://arxiv.org/pdf/2301.13546">pdf</a>, <a href="https://arxiv.org/ps/2301.13546">ps</a>, <a href="https://arxiv.org/format/2301.13546">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Joint Task Offloading and Cache Placement for Energy-Efficient Mobile Edge Computing Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liang%2C+J">Jingxuan Liang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hong Xing</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+F">Feng Wang</a>, <a href="/search/cs?searchtype=author&query=Lau%2C+V+K+N">Vincent K. N. Lau</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.13546v1-abstract-short" style="display: inline;"> This letter investigates a cache-enabled multiuser mobile edge computing (MEC) system with dynamic task arrivals, taking into account the impact of proactive cache placement on the system's overall energy consumption. We consider that an access point (AP) schedules a wireless device (WD) to offload computational tasks while executing the tasks of a finite library in the \emph{task caching} phase,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.13546v1-abstract-full').style.display = 'inline'; document.getElementById('2301.13546v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.13546v1-abstract-full" style="display: none;"> This letter investigates a cache-enabled multiuser mobile edge computing (MEC) system with dynamic task arrivals, taking into account the impact of proactive cache placement on the system's overall energy consumption. We consider that an access point (AP) schedules a wireless device (WD) to offload computational tasks while executing the tasks of a finite library in the \emph{task caching} phase, such that the nearby WDs with the same task request arriving later can directly download the task results in the \emph{task arrival and execution} phase. We aim for minimizing the system's weighted-sum energy over a finite-time horizon, by jointly optimizing the task caching decision and the MEC execution of the AP, and local computing as well as task offloading of the WDs at each time slot, subject to caching capacity, task causality, and completion deadline constraints. The formulated design problem is a mixed-integer nonlinear program. Under the assumption of fully predicable task arrivals, we first propose a branch-and-bound (BnB) based method to obtain the optimal offline solution. Next, we propose two low-complexity schemes based on convex relaxation and task-popularity, respectively. Finally, numerical results show the benefit of the proposed schemes over existing benchmark schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.13546v1-abstract-full').style.display = 'none'; document.getElementById('2301.13546v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 3 figures, accepted for publication in WCL</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.02989">arXiv:2208.02989</a> <span> [<a href="https://arxiv.org/pdf/2208.02989">pdf</a>, <a href="https://arxiv.org/format/2208.02989">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Formal Languages and Automata Theory">cs.FL</span> </div> </div> <p class="title is-5 mathjax"> Covariant-Contravariant Refinement Modal $渭$-calculus </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huili Xing</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.02989v1-abstract-short" style="display: inline;"> The notion of covariant-contravariant refinement (CC-refinement, for short) is a generalization of the notions of bisimulation, simulation and refinement. This paper introduces CC-refinement modal $渭$-calculus (CCRML$^渭$) obtained from the modal $渭$-calculus system K$^渭$ by adding CC-refinement quantifiers, establishes an axiom system for CCRML$^渭$ and explores the important properties: soundness,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.02989v1-abstract-full').style.display = 'inline'; document.getElementById('2208.02989v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.02989v1-abstract-full" style="display: none;"> The notion of covariant-contravariant refinement (CC-refinement, for short) is a generalization of the notions of bisimulation, simulation and refinement. This paper introduces CC-refinement modal $渭$-calculus (CCRML$^渭$) obtained from the modal $渭$-calculus system K$^渭$ by adding CC-refinement quantifiers, establishes an axiom system for CCRML$^渭$ and explores the important properties: soundness, completeness and decidability of this axiom system. The language of CCRML$^渭$ may be considered as a specification language for describing the properties of a system referring to reactive and generative actions. It may be used to formalize some interesting problems in the field of formal methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.02989v1-abstract-full').style.display = 'none'; document.getElementById('2208.02989v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.07795">arXiv:2207.07795</a> <span> [<a href="https://arxiv.org/pdf/2207.07795">pdf</a>, <a href="https://arxiv.org/format/2207.07795">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3503161.3548344">10.1145/3503161.3548344 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> RCRN: Real-world Character Image Restoration Network via Skeleton Extraction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+D">Daqian Shi</a>, <a href="/search/cs?searchtype=author&query=Diao%2C+X">Xiaolei Diao</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+H">Hao Tang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaomin Li</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hao Xing</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hao Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.07795v2-abstract-short" style="display: inline;"> Constructing high-quality character image datasets is challenging because real-world images are often affected by image degradation. There are limitations when applying current image restoration methods to such real-world character images, since (i) the categories of noise in character images are different from those in general images; (ii) real-world character images usually contain more complex… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.07795v2-abstract-full').style.display = 'inline'; document.getElementById('2207.07795v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.07795v2-abstract-full" style="display: none;"> Constructing high-quality character image datasets is challenging because real-world images are often affected by image degradation. There are limitations when applying current image restoration methods to such real-world character images, since (i) the categories of noise in character images are different from those in general images; (ii) real-world character images usually contain more complex image degradation, e.g., mixed noise at different noise levels. To address these problems, we propose a real-world character restoration network (RCRN) to effectively restore degraded character images, where character skeleton information and scale-ensemble feature extraction are utilized to obtain better restoration performance. The proposed method consists of a skeleton extractor (SENet) and a character image restorer (CiRNet). SENet aims to preserve the structural consistency of the character and normalize complex noise. Then, CiRNet reconstructs clean images from degraded character images and their skeletons. Due to the lack of benchmarks for real-world character image restoration, we constructed a dataset containing 1,606 character images with real-world degradation to evaluate the validity of the proposed method. The experimental results demonstrate that RCRN outperforms state-of-the-art methods quantitatively and qualitatively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.07795v2-abstract-full').style.display = 'none'; document.getElementById('2207.07795v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACM MM 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.07564">arXiv:2207.07564</a> <span> [<a href="https://arxiv.org/pdf/2207.07564">pdf</a>, <a href="https://arxiv.org/format/2207.07564">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Rethinking Attention Mechanism in Time Series Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+B">Bowen Zhao</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huanlai Xing</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinhan Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+F">Fuhong Song</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Z">Zhiwen Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.07564v1-abstract-short" style="display: inline;"> Attention-based models have been widely used in many areas, such as computer vision and natural language processing. However, relevant applications in time series classification (TSC) have not been explored deeply yet, causing a significant number of TSC algorithms still suffer from general problems of attention mechanism, like quadratic complexity. In this paper, we promote the efficiency and per… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.07564v1-abstract-full').style.display = 'inline'; document.getElementById('2207.07564v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.07564v1-abstract-full" style="display: none;"> Attention-based models have been widely used in many areas, such as computer vision and natural language processing. However, relevant applications in time series classification (TSC) have not been explored deeply yet, causing a significant number of TSC algorithms still suffer from general problems of attention mechanism, like quadratic complexity. In this paper, we promote the efficiency and performance of the attention mechanism by proposing our flexible multi-head linear attention (FMLA), which enhances locality awareness by layer-wise interactions with deformable convolutional blocks and online knowledge distillation. What's more, we propose a simple but effective mask mechanism that helps reduce the noise influence in time series and decrease the redundancy of the proposed FMLA by masking some positions of each given series proportionally. To stabilize this mechanism, samples are forwarded through the model with random mask layers several times and their outputs are aggregated to teach the same model with regular mask layers. We conduct extensive experiments on 85 UCR2018 datasets to compare our algorithm with 11 well-known ones and the results show that our algorithm has comparable performance in terms of top-1 accuracy. We also compare our model with three Transformer-based models with respect to the floating-point operations per second and number of parameters and find that our algorithm achieves significantly better efficiency with lower complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.07564v1-abstract-full').style.display = 'none'; document.getElementById('2207.07564v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.07269">arXiv:2207.07269</a> <span> [<a href="https://arxiv.org/pdf/2207.07269">pdf</a>, <a href="https://arxiv.org/format/2207.07269">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Weakly Supervised Video Salient Object Detection via Point Supervision </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+S">Shuyong Gao</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Haozhe Xing</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yan Wang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Q">Qianyu Guo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenqiang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.07269v1-abstract-short" style="display: inline;"> Video salient object detection models trained on pixel-wise dense annotation have achieved excellent performance, yet obtaining pixel-by-pixel annotated datasets is laborious. Several works attempt to use scribble annotations to mitigate this problem, but point supervision as a more labor-saving annotation method (even the most labor-saving method among manual annotation methods for dense predicti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.07269v1-abstract-full').style.display = 'inline'; document.getElementById('2207.07269v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.07269v1-abstract-full" style="display: none;"> Video salient object detection models trained on pixel-wise dense annotation have achieved excellent performance, yet obtaining pixel-by-pixel annotated datasets is laborious. Several works attempt to use scribble annotations to mitigate this problem, but point supervision as a more labor-saving annotation method (even the most labor-saving method among manual annotation methods for dense prediction), has not been explored. In this paper, we propose a strong baseline model based on point supervision. To infer saliency maps with temporal information, we mine inter-frame complementary information from short-term and long-term perspectives, respectively. Specifically, we propose a hybrid token attention module, which mixes optical flow and image information from orthogonal directions, adaptively highlighting critical optical flow information (channel dimension) and critical token information (spatial dimension). To exploit long-term cues, we develop the Long-term Cross-Frame Attention module (LCFA), which assists the current frame in inferring salient objects based on multi-frame tokens. Furthermore, we label two point-supervised datasets, P-DAVIS and P-DAVSOD, by relabeling the DAVIS and the DAVSOD dataset. Experiments on the six benchmark datasets illustrate our method outperforms the previous state-of-the-art weakly supervised methods and even is comparable with some fully supervised approaches. Source code and datasets are available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.07269v1-abstract-full').style.display = 'none'; document.getElementById('2207.07269v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by ACM MM 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.06351">arXiv:2207.06351</a> <span> [<a href="https://arxiv.org/pdf/2207.06351">pdf</a>, <a href="https://arxiv.org/format/2207.06351">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.patcog.2022.108806">10.1016/j.patcog.2022.108806 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Joint Prediction of Monocular Depth and Structure using Planar and Parallax Geometry </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hao Xing</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yifan Cao</a>, <a href="/search/cs?searchtype=author&query=Biber%2C+M">Maximilian Biber</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+M">Mingchuan Zhou</a>, <a href="/search/cs?searchtype=author&query=Burschka%2C+D">Darius Burschka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.06351v1-abstract-short" style="display: inline;"> Supervised learning depth estimation methods can achieve good performance when trained on high-quality ground-truth, like LiDAR data. However, LiDAR can only generate sparse 3D maps which causes losing information. Obtaining high-quality ground-truth depth data per pixel is difficult to acquire. In order to overcome this limitation, we propose a novel approach combining structure information from… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.06351v1-abstract-full').style.display = 'inline'; document.getElementById('2207.06351v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.06351v1-abstract-full" style="display: none;"> Supervised learning depth estimation methods can achieve good performance when trained on high-quality ground-truth, like LiDAR data. However, LiDAR can only generate sparse 3D maps which causes losing information. Obtaining high-quality ground-truth depth data per pixel is difficult to acquire. In order to overcome this limitation, we propose a novel approach combining structure information from a promising Plane and Parallax geometry pipeline with depth information into a U-Net supervised learning network, which results in quantitative and qualitative improvement compared to existing popular learning-based methods. In particular, the model is evaluated on two large-scale and challenging datasets: KITTI Vision Benchmark and Cityscapes dataset and achieve the best performance in terms of relative error. Compared with pure depth supervision models, our model has impressive performance on depth prediction of thin objects and edges, and compared to structure prediction baseline, our model performs more robustly. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.06351v1-abstract-full').style.display = 'none'; document.getElementById('2207.06351v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Pattern Recognition, May 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.05493">arXiv:2207.05493</a> <span> [<a href="https://arxiv.org/pdf/2207.05493">pdf</a>, <a href="https://arxiv.org/format/2207.05493">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Skeletal Human Action Recognition using Hybrid Attention based Graph Convolutional Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hao Xing</a>, <a href="/search/cs?searchtype=author&query=Burschka%2C+D">Darius Burschka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.05493v1-abstract-short" style="display: inline;"> In skeleton-based action recognition, Graph Convolutional Networks model human skeletal joints as vertices and connect them through an adjacency matrix, which can be seen as a local attention mask. However, in most existing Graph Convolutional Networks, the local attention mask is defined based on natural connections of human skeleton joints and ignores the dynamic relations for example between he… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.05493v1-abstract-full').style.display = 'inline'; document.getElementById('2207.05493v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.05493v1-abstract-full" style="display: none;"> In skeleton-based action recognition, Graph Convolutional Networks model human skeletal joints as vertices and connect them through an adjacency matrix, which can be seen as a local attention mask. However, in most existing Graph Convolutional Networks, the local attention mask is defined based on natural connections of human skeleton joints and ignores the dynamic relations for example between head, hands and feet joints. In addition, the attention mechanism has been proven effective in Natural Language Processing and image description, which is rarely investigated in existing methods. In this work, we proposed a new adaptive spatial attention layer that extends local attention map to global based on relative distance and relative angle information. Moreover, we design a new initial graph adjacency matrix that connects head, hands and feet, which shows visible improvement in terms of action recognition accuracy. The proposed model is evaluated on two large-scale and challenging datasets in the field of human activities in daily life: NTU-RGB+D and Kinetics skeleton. The results demonstrate that our model has strong performance on both dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.05493v1-abstract-full').style.display = 'none'; document.getElementById('2207.05493v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">26th International Conference on Pattern Recognition, 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.14506">arXiv:2206.14506</a> <span> [<a href="https://arxiv.org/pdf/2206.14506">pdf</a>, <a href="https://arxiv.org/format/2206.14506">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> An extension of process calculus for asynchronous communications between agents with epistemic states </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huili Xing</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.14506v2-abstract-short" style="display: inline;"> It plays a central role in intelligent agent systems to model agent's epistemic state and its change. Asynchrony plays a key role in distributed systems, in which the messages transmitted may not be received instantly by the agents. To characterize asynchronous communications, asynchronous announcement logic (AAL) has been presented, which focuses on the logic laws of the change of epistemic state… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.14506v2-abstract-full').style.display = 'inline'; document.getElementById('2206.14506v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.14506v2-abstract-full" style="display: none;"> It plays a central role in intelligent agent systems to model agent's epistemic state and its change. Asynchrony plays a key role in distributed systems, in which the messages transmitted may not be received instantly by the agents. To characterize asynchronous communications, asynchronous announcement logic (AAL) has been presented, which focuses on the logic laws of the change of epistemic state after receiving information. However AAL does not involve the interactive behaviours between an agent and its environment. Through enriching the well-known pi-calculus by adding the operators for passing basic facts and applying the well-known action model logic to describe agents' epistemic states, this paper presents the e-calculus to model epistemic interactions between agents with epistemic states. The e-calculus can be adopted to characterize synchronous and asynchronous communications between agents. To capture the asynchrony, a buffer pools is constructed to store the basic facts announced and each agent reads these facts from this buffer pool in some order. Based on the transmission of link names, the e-calculus is able to realize reading from this buffer pool in different orders. This paper gives two examples: one is to read in the order in which the announced basic facts are sent (First-in-first-out, FIFO), and the other is in an arbitrary order. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.14506v2-abstract-full').style.display = 'none'; document.getElementById('2206.14506v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages and 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.09925">arXiv:2205.09925</a> <span> [<a href="https://arxiv.org/pdf/2205.09925">pdf</a>, <a href="https://arxiv.org/format/2205.09925">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> On Jointly Optimizing Partial Offloading and SFC Mapping: A Cooperative Dual-agent Deep Reinforcement Learning Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinhan Wang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huanlai Xing</a>, <a href="/search/cs?searchtype=author&query=Song%2C+F">Fuhong Song</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+S">Shouxi Luo</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+P">Penglin Dai</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+B">Bowen Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.09925v1-abstract-short" style="display: inline;"> Multi-access edge computing (MEC) and network function virtualization (NFV) are promising technologies to support emerging IoT applications, especially those computation-intensive. In NFV-enabled MEC environment, service function chain (SFC), i.e., a set of ordered virtual network functions (VNFs), can be mapped on MEC servers. Mobile devices (MDs) can offload computation-intensive applications, w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.09925v1-abstract-full').style.display = 'inline'; document.getElementById('2205.09925v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.09925v1-abstract-full" style="display: none;"> Multi-access edge computing (MEC) and network function virtualization (NFV) are promising technologies to support emerging IoT applications, especially those computation-intensive. In NFV-enabled MEC environment, service function chain (SFC), i.e., a set of ordered virtual network functions (VNFs), can be mapped on MEC servers. Mobile devices (MDs) can offload computation-intensive applications, which can be represented by SFCs, fully or partially to MEC servers for remote execution. This paper studies the partial offloading and SFC mapping joint optimization (POSMJO) problem in an NFV-enabled MEC system, where an incoming task can be partitioned into two parts, one for local execution and the other for remote execution. The objective is to minimize the average cost in the long term which is a combination of execution delay, MD's energy consumption, and usage charge for edge computing. This problem consists of two closely related decision-making steps, namely task partition and VNF placement, which is highly complex and quite challenging. To address this, we propose a cooperative dual-agent deep reinforcement learning (CDADRL) algorithm, where we design a framework enabling interaction between two agents. Simulation results show that the proposed algorithm outperforms three combinations of deep reinforcement learning algorithms in terms of cumulative and average episodic rewards and it overweighs a number of baseline algorithms with respect to execution delay, energy consumption, and usage charge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.09925v1-abstract-full').style.display = 'none'; document.getElementById('2205.09925v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.12028">arXiv:2202.12028</a> <span> [<a href="https://arxiv.org/pdf/2202.12028">pdf</a>, <a href="https://arxiv.org/format/2202.12028">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Evolutionary Multi-Objective Reinforcement Learning Based Trajectory Control and Task Offloading in UAV-Assisted Mobile Edge Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+F">Fuhong Song</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huanlai Xing</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinhan Wang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+S">Shouxi Luo</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+P">Penglin Dai</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Z">Zhiwen Xiao</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+B">Bowen Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.12028v1-abstract-short" style="display: inline;"> This paper studies the trajectory control and task offloading (TCTO) problem in an unmanned aerial vehicle (UAV)-assisted mobile edge computing system, where a UAV flies along a planned trajectory to collect computation tasks from smart devices (SDs). We consider a scenario that SDs are not directly connected by the base station (BS) and the UAV has two roles to play: MEC server or wireless relay.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.12028v1-abstract-full').style.display = 'inline'; document.getElementById('2202.12028v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.12028v1-abstract-full" style="display: none;"> This paper studies the trajectory control and task offloading (TCTO) problem in an unmanned aerial vehicle (UAV)-assisted mobile edge computing system, where a UAV flies along a planned trajectory to collect computation tasks from smart devices (SDs). We consider a scenario that SDs are not directly connected by the base station (BS) and the UAV has two roles to play: MEC server or wireless relay. The UAV makes task offloading decisions online, in which the collected tasks can be executed locally on the UAV or offloaded to the BS for remote processing. The TCTO problem involves multi-objective optimization as its objectives are to minimize the task delay and the UAV's energy consumption, and maximize the number of tasks collected by the UAV, simultaneously. This problem is challenging because the three objectives conflict with each other. The existing reinforcement learning (RL) algorithms, either single-objective RLs or single-policy multi-objective RLs, cannot well address the problem since they cannot output multiple policies for various preferences (i.e. weights) across objectives in a single run. This paper adapts the evolutionary multi-objective RL (EMORL), a multi-policy multi-objective RL, to the TCTO problem. This algorithm can output multiple optimal policies in just one run, each optimizing a certain preference. The simulation results demonstrate that the proposed algorithm can obtain more excellent nondominated policies by striking a balance between the three objectives regarding policy quality, compared with two evolutionary and two multi-policy RL algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.12028v1-abstract-full').style.display = 'none'; document.getElementById('2202.12028v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.00011">arXiv:2201.00011</a> <span> [<a href="https://arxiv.org/pdf/2201.00011">pdf</a>, <a href="https://arxiv.org/format/2201.00011">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> An Efficient Federated Distillation Learning System for Multi-task Time Series Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Huanlai Xing</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Z">Zhiwen Xiao</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+R">Rong Qu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Zonghai Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+B">Bowen Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.00011v1-abstract-short" style="display: inline;"> This paper proposes an efficient federated distillation learning system (EFDLS) for multi-task time series classification (TSC). EFDLS consists of a central server and multiple mobile users, where different users may run different TSC tasks. EFDLS has two novel components, namely a feature-based student-teacher (FBST) framework and a distance-based weights matching (DBWM) scheme. Within each user,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.00011v1-abstract-full').style.display = 'inline'; document.getElementById('2201.00011v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.00011v1-abstract-full" style="display: none;"> This paper proposes an efficient federated distillation learning system (EFDLS) for multi-task time series classification (TSC). EFDLS consists of a central server and multiple mobile users, where different users may run different TSC tasks. EFDLS has two novel components, namely a feature-based student-teacher (FBST) framework and a distance-based weights matching (DBWM) scheme. Within each user, the FBST framework transfers knowledge from its teacher's hidden layers to its student's hidden layers via knowledge distillation, with the teacher and student having identical network structure. For each connected user, its student model's hidden layers' weights are uploaded to the EFDLS server periodically. The DBWM scheme is deployed on the server, with the least square distance used to measure the similarity between the weights of two given models. This scheme finds a partner for each connected user such that the user's and its partner's weights are the closest among all the weights uploaded. The server exchanges and sends back the user's and its partner's weights to these two users which then load the received weights to their teachers' hidden layers. Experimental results show that the proposed EFDLS achieves excellent performance on a set of selected UCR2018 datasets regarding top-1 accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.00011v1-abstract-full').style.display = 'none'; document.getElementById('2201.00011v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.14893">arXiv:2112.14893</a> <span> [<a href="https://arxiv.org/pdf/2112.14893">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1039/D0CP06378A">10.1039/D0CP06378A <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Reversible Upper Confidence Bound Algorithm to Generate Diverse Optimized Candidates </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chong%2C+B">Bin Chong</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yingguang Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zi-Le Wang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hang Xing</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhirong Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.14893v1-abstract-short" style="display: inline;"> Most algorithms for the multi-armed bandit problem in reinforcement learning aimed to maximize the expected reward, which are thus useful in searching the optimized candidate with the highest reward (function value) for diverse applications (e.g., AlphaGo). However, in some typical application scenaios such as drug discovery, the aim is to search a diverse set of candidates with high reward. Here… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.14893v1-abstract-full').style.display = 'inline'; document.getElementById('2112.14893v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.14893v1-abstract-full" style="display: none;"> Most algorithms for the multi-armed bandit problem in reinforcement learning aimed to maximize the expected reward, which are thus useful in searching the optimized candidate with the highest reward (function value) for diverse applications (e.g., AlphaGo). However, in some typical application scenaios such as drug discovery, the aim is to search a diverse set of candidates with high reward. Here we propose a reversible upper confidence bound (rUCB) algorithm for such a purpose, and demonstrate its application in virtual screening upon intrinsically disordered proteins (IDPs). It is shown that rUCB greatly reduces the query times while achieving both high accuracy and low performance loss.The rUCB may have potential application in multipoint optimization and other reinforcement-learning cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.14893v1-abstract-full').style.display = 'none'; document.getElementById('2112.14893v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 10 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Phys. Chem. Chem. Phys. 23 (11), 6800-6806 (2021) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.04039">arXiv:2110.04039</a> <span> [<a href="https://arxiv.org/pdf/2110.04039">pdf</a>, <a href="https://arxiv.org/format/2110.04039">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Global Context Enhanced Social Recommendation with Hierarchical Graph Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+H">Huance Xu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chao Huang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yong Xu</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+L">Lianghao Xia</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hao Xing</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.04039v1-abstract-short" style="display: inline;"> Social recommendation which aims to leverage social connections among users to enhance the recommendation performance. With the revival of deep learning techniques, many efforts have been devoted to developing various neural network-based social recommender systems, such as attention mechanisms and graph-based message passing frameworks. However, two important challenges have not been well address… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.04039v1-abstract-full').style.display = 'inline'; document.getElementById('2110.04039v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.04039v1-abstract-full" style="display: none;"> Social recommendation which aims to leverage social connections among users to enhance the recommendation performance. With the revival of deep learning techniques, many efforts have been devoted to developing various neural network-based social recommender systems, such as attention mechanisms and graph-based message passing frameworks. However, two important challenges have not been well addressed yet: (i) Most of existing social recommendation models fail to fully explore the multi-type user-item interactive behavior as well as the underlying cross-relational inter-dependencies. (ii) While the learned social state vector is able to model pair-wise user dependencies, it still has limited representation capacity in capturing the global social context across users. To tackle these limitations, we propose a new Social Recommendation framework with Hierarchical Graph Neural Networks (SR-HGNN). In particular, we first design a relation-aware reconstructed graph neural network to inject the cross-type collaborative semantics into the recommendation framework. In addition, we further augment SR-HGNN with a social relation encoder based on the mutual information learning paradigm between low-level user embeddings and high-level global representation, which endows SR-HGNN with the capability of capturing the global social contextual signals. Empirical results on three public benchmarks demonstrate that SR-HGNN significantly outperforms state-of-the-art recommendation methods. Source codes are available at: https://github.com/xhcdream/SR-HGNN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.04039v1-abstract-full').style.display = 'none'; document.getElementById('2110.04039v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published as a full paper at ICDM 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.03987">arXiv:2110.03987</a> <span> [<a href="https://arxiv.org/pdf/2110.03987">pdf</a>, <a href="https://arxiv.org/format/2110.03987">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Knowledge-aware Coupled Graph Neural Network for Social Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chao Huang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Huance Xu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yong Xu</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+P">Peng Dai</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+L">Lianghao Xia</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+M">Mengyin Lu</a>, <a href="/search/cs?searchtype=author&query=Bo%2C+L">Liefeng Bo</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hao Xing</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+X">Xiaoping Lai</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Y">Yanfang Ye</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.03987v1-abstract-short" style="display: inline;"> Social recommendation task aims to predict users' preferences over items with the incorporation of social connections among users, so as to alleviate the sparse issue of collaborative filtering. While many recent efforts show the effectiveness of neural network-based social recommender systems, several important challenges have not been well addressed yet: (i) The majority of models only consider… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.03987v1-abstract-full').style.display = 'inline'; document.getElementById('2110.03987v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.03987v1-abstract-full" style="display: none;"> Social recommendation task aims to predict users' preferences over items with the incorporation of social connections among users, so as to alleviate the sparse issue of collaborative filtering. While many recent efforts show the effectiveness of neural network-based social recommender systems, several important challenges have not been well addressed yet: (i) The majority of models only consider users' social connections, while ignoring the inter-dependent knowledge across items; (ii) Most of existing solutions are designed for singular type of user-item interactions, making them infeasible to capture the interaction heterogeneity; (iii) The dynamic nature of user-item interactions has been less explored in many social-aware recommendation techniques. To tackle the above challenges, this work proposes a Knowledge-aware Coupled Graph Neural Network (KCGN) that jointly injects the inter-dependent knowledge across items and users into the recommendation framework. KCGN enables the high-order user- and item-wise relation encoding by exploiting the mutual information for global graph structure awareness. Additionally, we further augment KCGN with the capability of capturing dynamic multi-typed user-item interactive patterns. Experimental studies on real-world datasets show the effectiveness of our method against many strong baselines in a variety of settings. Source codes are available at: https://github.com/xhcdream/KCGN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.03987v1-abstract-full').style.display = 'none'; document.getElementById('2110.03987v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published as a paper at AAAI 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.02376">arXiv:2109.02376</a> <span> [<a href="https://arxiv.org/pdf/2109.02376">pdf</a>, <a href="https://arxiv.org/ps/2109.02376">ps</a>, <a href="https://arxiv.org/format/2109.02376">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Robust Event Detection based on Spatio-Temporal Latent Action Unit using Skeletal Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hao Xing</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+Y">Yuxuan Xue</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+M">Mingchuan Zhou</a>, <a href="/search/cs?searchtype=author&query=Burschka%2C+D">Darius Burschka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.02376v2-abstract-short" style="display: inline;"> This paper propose a novel dictionary learning approach to detect event action using skeletal information extracted from RGBD video. The event action is represented as several latent atoms and composed of latent spatial and temporal attributes. We perform the method at the example of fall event detection. The skeleton frames are clustered by an initial K-means method. Each skeleton frame is assign… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.02376v2-abstract-full').style.display = 'inline'; document.getElementById('2109.02376v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.02376v2-abstract-full" style="display: none;"> This paper propose a novel dictionary learning approach to detect event action using skeletal information extracted from RGBD video. The event action is represented as several latent atoms and composed of latent spatial and temporal attributes. We perform the method at the example of fall event detection. The skeleton frames are clustered by an initial K-means method. Each skeleton frame is assigned with a varying weight parameter and fed into our Gradual Online Dictionary Learning (GODL) algorithm. During the training process, outlier frames will be gradually filtered by reducing the weight that is inversely proportional to a cost. In order to strictly distinguish the event action from similar actions and robustly acquire its action unit, we build a latent unit temporal structure for each sub-action. We evaluate the proposed method on parts of the NTURGB+D dataset, which includes 209 fall videos, 405 ground-lift videos, 420 sit-down videos, and 280 videos of 46 otheractions. We present the experimental validation of the achieved accuracy, recall and precision. Our approach achieves the bestperformance on precision and accuracy of human fall event detection, compared with other existing dictionary learning methods. With increasing noise ratio, our method remains the highest accuracy and the lowest variance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.02376v2-abstract-full').style.display = 'none'; document.getElementById('2109.02376v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">2021 IROS</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.5.1; I.5.2; I.5.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.01164">arXiv:2109.01164</a> <span> [<a href="https://arxiv.org/pdf/2109.01164">pdf</a>, <a href="https://arxiv.org/format/2109.01164">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Scalable Data Annotation Pipeline for High-Quality Large Speech Datasets Development </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mingkuan Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chi Zhang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hua Xing</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+C">Chao Feng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Monchu Chen</a>, <a href="/search/cs?searchtype=author&query=Bishop%2C+J">Judith Bishop</a>, <a href="/search/cs?searchtype=author&query=Ngapo%2C+G">Grace Ngapo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.01164v1-abstract-short" style="display: inline;"> This paper introduces a human-in-the-loop (HITL) data annotation pipeline to generate high-quality, large-scale speech datasets. The pipeline combines human and machine advantages to more quickly, accurately, and cost-effectively annotate datasets with machine pre-labeling and fully manual auditing. Quality control mechanisms such as blind testing, behavior monitoring, and data validation have bee… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.01164v1-abstract-full').style.display = 'inline'; document.getElementById('2109.01164v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.01164v1-abstract-full" style="display: none;"> This paper introduces a human-in-the-loop (HITL) data annotation pipeline to generate high-quality, large-scale speech datasets. The pipeline combines human and machine advantages to more quickly, accurately, and cost-effectively annotate datasets with machine pre-labeling and fully manual auditing. Quality control mechanisms such as blind testing, behavior monitoring, and data validation have been adopted in the annotation pipeline to mitigate potential bias introduced by machine-generated labels. Our A/B testing and pilot results demonstrated the HITL pipeline can improve annotation speed and capacity by at least 80% and quality is comparable to or higher than manual double pass annotation. We are leveraging this scalable pipeline to create and continuously grow ultra-high volume off-the-shelf (UHV-OTS) speech corpora for multiple languages, with the capability to expand to 10,000+ hours per language annually. Customized datasets can be produced from the UHV-OTS corpora using dynamic packaging. UHV-OTS is a long-term Appen project to support commercial and academic research data needs in speech processing. Appen will donate a number of free speech datasets from the UHV-OTS each year to support academic and open source community research under the CC-BY-SA license. We are also releasing the code of the data pre-processing and pre-tagging pipeline under the Apache 2.0 license to allow reproduction of the results reported in the paper. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.01164v1-abstract-full').style.display = 'none'; document.getElementById('2109.01164v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to NeurIPS 2021 Datasets and Benchmarks Track (Round 2)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2108.04682">arXiv:2108.04682</a> <span> [<a href="https://arxiv.org/pdf/2108.04682">pdf</a>, <a href="https://arxiv.org/format/2108.04682">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> ChemiRise: a data-driven retrosynthesis engine </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+X">Xiangyan Sun</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Ke Liu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Yuquan Lin</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+L">Lingjie Wu</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Haoming Xing</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+M">Minghong Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Ji Liu</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+S">Suocheng Tan</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+Z">Zekun Ni</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Q">Qi Han</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Junqiu Wu</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+J">Jie Fan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2108.04682v1-abstract-short" style="display: inline;"> We have developed an end-to-end, retrosynthesis system, named ChemiRise, that can propose complete retrosynthesis routes for organic compounds rapidly and reliably. The system was trained on a processed patent database of over 3 million organic reactions. Experimental reactions were atom-mapped, clustered, and extracted into reaction templates. We then trained a graph convolutional neural network-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.04682v1-abstract-full').style.display = 'inline'; document.getElementById('2108.04682v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2108.04682v1-abstract-full" style="display: none;"> We have developed an end-to-end, retrosynthesis system, named ChemiRise, that can propose complete retrosynthesis routes for organic compounds rapidly and reliably. The system was trained on a processed patent database of over 3 million organic reactions. Experimental reactions were atom-mapped, clustered, and extracted into reaction templates. We then trained a graph convolutional neural network-based one-step reaction proposer using template embeddings and developed a guiding algorithm on the directed acyclic graph (DAG) of chemical compounds to find the best candidate to explore. The atom-mapping algorithm and the one-step reaction proposer were benchmarked against previous studies and showed better results. The final product was demonstrated by retrosynthesis routes reviewed and rated by human experts, showing satisfying functionality and a potential productivity boost in real-life use cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.04682v1-abstract-full').style.display = 'none'; document.getElementById('2108.04682v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.11220">arXiv:2103.11220</a> <span> [<a href="https://arxiv.org/pdf/2103.11220">pdf</a>, <a href="https://arxiv.org/ps/2103.11220">ps</a>, <a href="https://arxiv.org/format/2103.11220">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Joint Resource Allocation and Cache Placement for Location-Aware Multi-User Mobile Edge Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jiechen Chen</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hong Xing</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+X">Xiaohui Lin</a>, <a href="/search/cs?searchtype=author&query=Nallanathan%2C+A">Arumugam Nallanathan</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+S">Suzhi Bi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.11220v2-abstract-short" style="display: inline;"> With the growing demand for latency-critical and computation-intensive Internet of Things (IoT) services, the IoT-oriented network architecture, mobile edge computing (MEC), has emerged as a promising technique to reinforce the computation capability of the resource-constrained IoT devices. To exploit the cloud-like functions at the network edge, service caching has been implemented to reuse the c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.11220v2-abstract-full').style.display = 'inline'; document.getElementById('2103.11220v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.11220v2-abstract-full" style="display: none;"> With the growing demand for latency-critical and computation-intensive Internet of Things (IoT) services, the IoT-oriented network architecture, mobile edge computing (MEC), has emerged as a promising technique to reinforce the computation capability of the resource-constrained IoT devices. To exploit the cloud-like functions at the network edge, service caching has been implemented to reuse the computation task input/output data, thus effectively reducing the delay incurred by data retransmissions and repeated execution of the same task. In a multi-user cache-assisted MEC system, users' preferences for different types of services, possibly dependent on their locations, play an important role in joint design of communication, computation and service caching. In this paper, we consider multiple representative locations, where users at the same location share the same preference profile for a given set of services. Specifically, by exploiting the location-aware users' preference profiles, we propose joint optimization of the binary cache placement, the edge computation resource and the bandwidth allocation to minimize the expected sum-energy consumption, subject to the bandwidth and the computation limitations as well as the service latency constraints. To effectively solve the mixed-integer non-convex problem, we propose a deep learning (DL)-based offline cache placement scheme using a novel stochastic quantization based discrete-action generation method. The proposed hybrid learning framework advocates both benefits from the model-free DL approach and the model-based optimization. The simulations verify that the proposed DL-based scheme saves roughly 33% and 6.69% of energy consumption compared with the greedy caching and the popular caching, respectively, while achieving up to 99.01% of the optimal performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.11220v2-abstract-full').style.display = 'none'; document.getElementById('2103.11220v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">32 pages, 9 figures, accepted to IEEE Internet of Things Journal</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.04162">arXiv:2103.04162</a> <span> [<a href="https://arxiv.org/pdf/2103.04162">pdf</a>, <a href="https://arxiv.org/format/2103.04162">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Molecular modeling with machine-learned universal potential functions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+K">Ke Liu</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+Z">Zekun Ni</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Zhenyu Zhou</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+S">Suocheng Tan</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+X">Xun Zou</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+H">Haoming Xing</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+X">Xiangyan Sun</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Q">Qi Han</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Junqiu Wu</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+J">Jie Fan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.04162v2-abstract-short" style="display: inline;"> Molecular modeling is an important topic in drug discovery. Decades of research have led to the development of high quality scalable molecular force fields. In this paper, we show that neural networks can be used to train a universal approximator for energy potential functions. By incorporating a fully automated training process we have been able to train smooth, differentiable, and predictive pot… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.04162v2-abstract-full').style.display = 'inline'; document.getElementById('2103.04162v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.04162v2-abstract-full" style="display: none;"> Molecular modeling is an important topic in drug discovery. Decades of research have led to the development of high quality scalable molecular force fields. In this paper, we show that neural networks can be used to train a universal approximator for energy potential functions. By incorporating a fully automated training process we have been able to train smooth, differentiable, and predictive potential functions on large-scale crystal structures. A variety of tests have also been performed to show the superiority and versatility of the machine-learned model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.04162v2-abstract-full').style.display = 'none'; document.getElementById('2103.04162v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 April, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2101.12704">arXiv:2101.12704</a> <span> [<a href="https://arxiv.org/pdf/2101.12704">pdf</a>, <a href="https://arxiv.org/ps/2101.12704">ps</a>, <a href="https://arxiv.org/format/2101.12704">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/JSAC.2021.3118400">10.1109/JSAC.2021.3118400 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Federated Learning over Wireless Device-to-Device Networks: Algorithms and Convergence Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xing%2C+H">Hong Xing</a>, <a href="/search/cs?searchtype=author&query=Simeone%2C+O">Osvaldo Simeone</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+S">Suzhi Bi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2101.12704v2-abstract-short" style="display: inline;"> The proliferation of Internet-of-Things (IoT) devices and cloud-computing applications over siloed data centers is motivating renewed interest in the collaborative training of a shared model by multiple individual clients via federated learning (FL). To improve the communication efficiency of FL implementations in wireless systems, recent works have proposed compression and dimension reduction mec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.12704v2-abstract-full').style.display = 'inline'; document.getElementById('2101.12704v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2101.12704v2-abstract-full" style="display: none;"> The proliferation of Internet-of-Things (IoT) devices and cloud-computing applications over siloed data centers is motivating renewed interest in the collaborative training of a shared model by multiple individual clients via federated learning (FL). To improve the communication efficiency of FL implementations in wireless systems, recent works have proposed compression and dimension reduction mechanisms, along with digital and analog transmission schemes that account for channel noise, fading, and interference. The prior art has mainly focused on star topologies consisting of distributed clients and a central server. In contrast, this paper studies FL over wireless device-to-device (D2D) networks by providing theoretical insights into the performance of digital and analog implementations of decentralized stochastic gradient descent (DSGD). First, we introduce generic digital and analog wireless implementations of communication-efficient DSGD algorithms, leveraging random linear coding (RLC) for compression and over-the-air computation (AirComp) for simultaneous analog transmissions. Next, under the assumptions of convexity and connectivity, we provide convergence bounds for both implementations. The results demonstrate the dependence of the optimality gap on the connectivity and on the signal-to-noise ratio (SNR) levels in the network. The analysis is corroborated by experiments on an image-classification task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.12704v2-abstract-full').style.display = 'none'; document.getElementById('2101.12704v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">46 pages, 9 figures, to appear in IEEE J. Sel. Areas Commun</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Xing%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Xing%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Xing%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository