CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 327 results for author: <span class="mathjax">Du, H</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Du%2C+H">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Du, H"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Du%2C+H&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Du, H"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Du%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Du%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Du%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Du%2C+H&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Du%2C+H&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Du%2C+H&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Du%2C+H&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> <li> <a href="/search/?searchtype=author&query=Du%2C+H&start=300" class="pagination-link " aria-label="Page 7" aria-current="page">7 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11386">arXiv:2502.11386</a> <span> [<a href="https://arxiv.org/pdf/2502.11386">pdf</a>, <a href="https://arxiv.org/format/2502.11386">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Intelligent Mobile AI-Generated Content Services via Interactive Prompt Engineering and Dynamic Service Provisioning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yinqiu Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruichen Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xianbin Wang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11386v1-abstract-short" style="display: inline;"> Due to massive computational demands of large generative models, AI-Generated Content (AIGC) can organize collaborative Mobile AIGC Service Providers (MASPs) at network edges to provide ubiquitous and customized content generation for resource-constrained users. However, such a paradigm faces two significant challenges: 1) raw prompts (i.e., the task description from users) often lead to poor gene… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11386v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11386v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11386v1-abstract-full" style="display: none;"> Due to massive computational demands of large generative models, AI-Generated Content (AIGC) can organize collaborative Mobile AIGC Service Providers (MASPs) at network edges to provide ubiquitous and customized content generation for resource-constrained users. However, such a paradigm faces two significant challenges: 1) raw prompts (i.e., the task description from users) often lead to poor generation quality due to users' lack of experience with specific AIGC models, and 2) static service provisioning fails to efficiently utilize computational and communication resources given the heterogeneity of AIGC tasks. To address these challenges, we propose an intelligent mobile AIGC service scheme. Firstly, we develop an interactive prompt engineering mechanism that leverages a Large Language Model (LLM) to generate customized prompt corpora and employs Inverse Reinforcement Learning (IRL) for policy imitation through small-scale expert demonstrations. Secondly, we formulate a dynamic mobile AIGC service provisioning problem that jointly optimizes the number of inference trials and transmission power allocation. Then, we propose the Diffusion-Enhanced Deep Deterministic Policy Gradient (D3PG) algorithm to solve the problem. By incorporating the diffusion process into Deep Reinforcement Learning (DRL) architecture, the environment exploration capability can be improved, thus adapting to varying mobile AIGC scenarios. Extensive experimental results demonstrate that our prompt engineering approach improves single-round generation success probability by 6.3 times, while D3PG increases the user service experience by 67.8% compared to baseline DRL approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11386v1-abstract-full').style.display = 'none'; document.getElementById('2502.11386v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10687">arXiv:2502.10687</a> <span> [<a href="https://arxiv.org/pdf/2502.10687">pdf</a>, <a href="https://arxiv.org/format/2502.10687">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Multi-objective Aerial IRS-assisted ISAC Optimization via Generative AI-enhanced Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+W">Wenwen Xie</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+G">Geng Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaibin Huang</a>, <a href="/search/cs?searchtype=author&query=Leung%2C+V+C+M">Victor C. M. Leung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10687v1-abstract-short" style="display: inline;"> Integrated sensing and communication (ISAC) has garnered substantial research interest owing to its pivotal role in advancing the development of next-generation (6G) wireless networks. However, achieving a performance balance between communication and sensing in the dual-function radar communication (DFRC)-based ISAC system remains a significant challenge. In this paper, an aerial intelligent refl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10687v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10687v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10687v1-abstract-full" style="display: none;"> Integrated sensing and communication (ISAC) has garnered substantial research interest owing to its pivotal role in advancing the development of next-generation (6G) wireless networks. However, achieving a performance balance between communication and sensing in the dual-function radar communication (DFRC)-based ISAC system remains a significant challenge. In this paper, an aerial intelligent reflecting surface (IRS)-assisted ISAC system is explored, where a base station (BS) supports dual-functional operations, enabling both data transmission for multiple users and sensing for a blocked target, with the channel quality enhanced by an IRS mounted on the unmanned aerial vehicle (UAV). Moreover, we formulate an integrated communication, sensing, and energy efficiency multi-objective optimization problem (CSEMOP), which aims to maximize the communication rate of the users and the echo rate of the target, while minimizing UAV propulsion energy consumption by jointly optimizing the BS beamforming matrix, IRS phase shifts, the flight velocity and angle of the UAV. Considering the non-convexity, trade-off, and dynamic nature of the formulated CSEMOP, we propose a generative diffusion model-based deep deterministic policy gradient (GDMDDPG) method to solve the problem. Specifically, the diffusion model is incorporated into the actor network of DDPG to improve the action quality, with noise perturbation mechanism for better exploration and recent prioritized experience replay (RPER) sampling mechanism for enhanced training efficiency. Simulation results indicate that the GDMDDPG method delivers superior performance compared to the existing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10687v1-abstract-full').style.display = 'none'; document.getElementById('2502.10687v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09970">arXiv:2502.09970</a> <span> [<a href="https://arxiv.org/pdf/2502.09970">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Universal Machine Learning Interatomic Potentials are Ready for Solid Ion Conductors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongwei Du</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+J">Jian Hui</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lanting Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hong Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09970v1-abstract-short" style="display: inline;"> With the rapid development of energy storage technology, high-performance solid-state electrolytes (SSEs) have become critical for next-generation lithium-ion batteries. These materials require high ionic conductivity, excellent electrochemical stability, and good mechanical properties to meet the demands of electric vehicles and portable electronics. However, traditional methods like density func… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09970v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09970v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09970v1-abstract-full" style="display: none;"> With the rapid development of energy storage technology, high-performance solid-state electrolytes (SSEs) have become critical for next-generation lithium-ion batteries. These materials require high ionic conductivity, excellent electrochemical stability, and good mechanical properties to meet the demands of electric vehicles and portable electronics. However, traditional methods like density functional theory (DFT) and empirical force fields face challenges such as high computational costs, poor scalability, and limited accuracy across material systems. Universal machine learning interatomic potentials (uMLIPs) offer a promising solution with their efficiency and near-DFT-level accuracy.This study systematically evaluates six advanced uMLIP models (MatterSim, MACE, SevenNet, CHGNet, M3GNet, and ORBFF) in terms of energy, forces, thermodynamic properties, elastic moduli, and lithium-ion diffusion behavior. The results show that MatterSim outperforms others in nearly all metrics, particularly in complex material systems, demonstrating superior accuracy and physical consistency. Other models exhibit significant deviations due to issues like energy inconsistency or insufficient training data coverage.Further analysis reveals that MatterSim achieves excellent agreement with reference values in lithium-ion diffusivity calculations, especially at room temperature. Studies on Li3YCl6 and Li6PS5Cl uncover how crystal structure, anion disorder levels, and Na/Li arrangements influence ionic conductivity. Appropriate S/Cl disorder levels and optimized Na/Li arrangements enhance diffusion pathway connectivity, improving overall ionic transport performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09970v1-abstract-full').style.display = 'none'; document.getElementById('2502.09970v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09934">arXiv:2502.09934</a> <span> [<a href="https://arxiv.org/pdf/2502.09934">pdf</a>, <a href="https://arxiv.org/format/2502.09934">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Fused Partial Gromov-Wasserstein for Structured Objects </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bai%2C+Y">Yikun Bai</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+H">Huy Tran</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hengrong Du</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xinran Liu</a>, <a href="/search/cs?searchtype=author&query=Kolouri%2C+S">Soheil Kolouri</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09934v1-abstract-short" style="display: inline;"> Structured data, such as graphs, are vital in machine learning due to their capacity to capture complex relationships and interactions. In recent years, the Fused Gromov-Wasserstein (FGW) distance has attracted growing interest because it enables the comparison of structured data by jointly accounting for feature similarity and geometric structure. However, as a variant of optimal transport (OT),… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09934v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09934v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09934v1-abstract-full" style="display: none;"> Structured data, such as graphs, are vital in machine learning due to their capacity to capture complex relationships and interactions. In recent years, the Fused Gromov-Wasserstein (FGW) distance has attracted growing interest because it enables the comparison of structured data by jointly accounting for feature similarity and geometric structure. However, as a variant of optimal transport (OT), classical FGW assumes an equal mass constraint on the compared data. In this work, we relax this mass constraint and propose the Fused Partial Gromov-Wasserstein (FPGW) framework, which extends FGW to accommodate unbalanced data. Theoretically, we establish the relationship between FPGW and FGW and prove the metric properties of FPGW. Numerically, we introduce Frank-Wolfe solvers for the proposed FPGW framework and provide a convergence analysis. Finally, we evaluate the FPGW distance through graph classification and clustering experiments, demonstrating its robust performance, especially when data is corrupted by outlier noise. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09934v1-abstract-full').style.display = 'none'; document.getElementById('2502.09934v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2402.03664</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06563">arXiv:2502.06563</a> <span> [<a href="https://arxiv.org/pdf/2502.06563">pdf</a>, <a href="https://arxiv.org/format/2502.06563">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Large Language Models Meet Symbolic Provers for Logical Reasoning Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qi%2C+C">Chengwen Qi</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+R">Ren Ma</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bowen Li</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">He Du</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+B">Binyuan Hui</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jinwang Wu</a>, <a href="/search/cs?searchtype=author&query=Laili%2C+Y">Yuanjun Laili</a>, <a href="/search/cs?searchtype=author&query=He%2C+C">Conghui He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06563v1-abstract-short" style="display: inline;"> First-order logic (FOL) reasoning, which involves sequential deduction, is pivotal for intelligent systems and serves as a valuable task for evaluating reasoning capabilities, particularly in chain-of-thought (CoT) contexts. Existing benchmarks often rely on extensive human annotation or handcrafted templates, making it difficult to achieve the necessary complexity, scalability, and diversity for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06563v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06563v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06563v1-abstract-full" style="display: none;"> First-order logic (FOL) reasoning, which involves sequential deduction, is pivotal for intelligent systems and serves as a valuable task for evaluating reasoning capabilities, particularly in chain-of-thought (CoT) contexts. Existing benchmarks often rely on extensive human annotation or handcrafted templates, making it difficult to achieve the necessary complexity, scalability, and diversity for robust evaluation. To address these limitations, we propose a novel framework called ProverGen that synergizes the generative strengths of Large Language Models (LLMs) with the rigor and precision of symbolic provers, enabling the creation of a scalable, diverse, and high-quality FOL reasoning dataset, ProverQA. ProverQA is also distinguished by its inclusion of accessible and logically coherent intermediate reasoning steps for each problem. Our evaluation shows that state-of-the-art LLMs struggle to solve ProverQA problems, even with CoT prompting, highlighting the dataset's challenging nature. We also finetune Llama3.1-8B-Instruct on a separate training set generated by our framework. The finetuned model demonstrates consistent improvements on both in-distribution and out-of-distribution test sets, suggesting the value of our proposed data generation framework. Code available at: https://github.com/opendatalab/ProverGen <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06563v1-abstract-full').style.display = 'none'; document.getElementById('2502.06563v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04988">arXiv:2502.04988</a> <span> [<a href="https://arxiv.org/pdf/2502.04988">pdf</a>, <a href="https://arxiv.org/format/2502.04988">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CMamba: Learned Image Compression with State Space Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhuojie Wu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Heming Du</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuyun Wang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+M">Ming Lu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Haiyang Sun</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yandong Guo</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xin Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04988v1-abstract-short" style="display: inline;"> Learned Image Compression (LIC) has explored various architectures, such as Convolutional Neural Networks (CNNs) and transformers, in modeling image content distributions in order to achieve compression effectiveness. However, achieving high rate-distortion performance while maintaining low computational complexity (\ie, parameters, FLOPs, and latency) remains challenging. In this paper, we propos… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04988v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04988v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04988v1-abstract-full" style="display: none;"> Learned Image Compression (LIC) has explored various architectures, such as Convolutional Neural Networks (CNNs) and transformers, in modeling image content distributions in order to achieve compression effectiveness. However, achieving high rate-distortion performance while maintaining low computational complexity (\ie, parameters, FLOPs, and latency) remains challenging. In this paper, we propose a hybrid Convolution and State Space Models (SSMs) based image compression framework, termed \textit{CMamba}, to achieve superior rate-distortion performance with low computational complexity. Specifically, CMamba introduces two key components: a Content-Adaptive SSM (CA-SSM) module and a Context-Aware Entropy (CAE) module. First, we observed that SSMs excel in modeling overall content but tend to lose high-frequency details. In contrast, CNNs are proficient at capturing local details. Motivated by this, we propose the CA-SSM module that can dynamically fuse global content extracted by SSM blocks and local details captured by CNN blocks in both encoding and decoding stages. As a result, important image content is well preserved during compression. Second, our proposed CAE module is designed to reduce spatial and channel redundancies in latent representations after encoding. Specifically, our CAE leverages SSMs to parameterize the spatial content in latent representations. Benefiting from SSMs, CAE significantly improves spatial compression efficiency while reducing spatial content redundancies. Moreover, along the channel dimension, CAE reduces inter-channel redundancies of latent representations via an autoregressive manner, which can fully exploit prior knowledge from previous channels without sacrificing efficiency. Experimental results demonstrate that CMamba achieves superior rate-distortion performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04988v1-abstract-full').style.display = 'none'; document.getElementById('2502.04988v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17361">arXiv:2501.17361</a> <span> [<a href="https://arxiv.org/pdf/2501.17361">pdf</a>, <a href="https://arxiv.org/format/2501.17361">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> The M-factor: A Novel Metric for Evaluating Neural Architecture Search in Resource-Constrained Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Thudumu%2C+S">Srikanth Thudumu</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+H">Hy Nguyen</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hung Du</a>, <a href="/search/cs?searchtype=author&query=Duong%2C+N">Nhat Duong</a>, <a href="/search/cs?searchtype=author&query=Rasool%2C+Z">Zafaryab Rasool</a>, <a href="/search/cs?searchtype=author&query=Logothetis%2C+R">Rena Logothetis</a>, <a href="/search/cs?searchtype=author&query=Barnett%2C+S">Scott Barnett</a>, <a href="/search/cs?searchtype=author&query=Vasa%2C+R">Rajesh Vasa</a>, <a href="/search/cs?searchtype=author&query=Mouzakis%2C+K">Kon Mouzakis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17361v1-abstract-short" style="display: inline;"> Neural Architecture Search (NAS) aims to automate the design of deep neural networks. However, existing NAS techniques often focus on maximising accuracy, neglecting model efficiency. This limitation restricts their use in resource-constrained environments like mobile devices and edge computing systems. Moreover, current evaluation metrics prioritise performance over efficiency, lacking a balanced… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17361v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17361v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17361v1-abstract-full" style="display: none;"> Neural Architecture Search (NAS) aims to automate the design of deep neural networks. However, existing NAS techniques often focus on maximising accuracy, neglecting model efficiency. This limitation restricts their use in resource-constrained environments like mobile devices and edge computing systems. Moreover, current evaluation metrics prioritise performance over efficiency, lacking a balanced approach for assessing architectures suitable for constrained scenarios. To address these challenges, this paper introduces the M-factor, a novel metric combining model accuracy and size. Four diverse NAS techniques are compared: Policy-Based Reinforcement Learning, Regularised Evolution, Tree-structured Parzen Estimator (TPE), and Multi-trial Random Search. These techniques represent different NAS paradigms, providing a comprehensive evaluation of the M-factor. The study analyses ResNet configurations on the CIFAR-10 dataset, with a search space of 19,683 configurations. Experiments reveal that Policy-Based Reinforcement Learning and Regularised Evolution achieved M-factor values of 0.84 and 0.82, respectively, while Multi-trial Random Search attained 0.75, and TPE reached 0.67. Policy-Based Reinforcement Learning exhibited performance changes after 39 trials, while Regularised Evolution optimised within 20 trials. The research investigates the optimisation dynamics and trade-offs between accuracy and model size for each strategy. Findings indicate that, in some cases, random search performed comparably to more complex algorithms when assessed using the M-factor. These results highlight how the M-factor addresses the limitations of existing metrics by guiding NAS towards balanced architectures, offering valuable insights for selecting strategies in scenarios requiring both performance and efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17361v1-abstract-full').style.display = 'none'; document.getElementById('2501.17361v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16753">arXiv:2501.16753</a> <span> [<a href="https://arxiv.org/pdf/2501.16753">pdf</a>, <a href="https://arxiv.org/format/2501.16753">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Overcoming Semantic Dilution in Transformer-Based Next Frame Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+H">Hy Nguyen</a>, <a href="/search/cs?searchtype=author&query=Thudumu%2C+S">Srikanth Thudumu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hung Du</a>, <a href="/search/cs?searchtype=author&query=Vasa%2C+R">Rajesh Vasa</a>, <a href="/search/cs?searchtype=author&query=Mouzakis%2C+K">Kon Mouzakis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16753v1-abstract-short" style="display: inline;"> Next-frame prediction in videos is crucial for applications such as autonomous driving, object tracking, and motion prediction. The primary challenge in next-frame prediction lies in effectively capturing and processing both spatial and temporal information from previous video sequences. The transformer architecture, known for its prowess in handling sequence data, has made remarkable progress in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16753v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16753v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16753v1-abstract-full" style="display: none;"> Next-frame prediction in videos is crucial for applications such as autonomous driving, object tracking, and motion prediction. The primary challenge in next-frame prediction lies in effectively capturing and processing both spatial and temporal information from previous video sequences. The transformer architecture, known for its prowess in handling sequence data, has made remarkable progress in this domain. However, transformer-based next-frame prediction models face notable issues: (a) The multi-head self-attention (MHSA) mechanism requires the input embedding to be split into $N$ chunks, where $N$ is the number of heads. Each segment captures only a fraction of the original embeddings information, which distorts the representation of the embedding in the latent space, resulting in a semantic dilution problem; (b) These models predict the embeddings of the next frames rather than the frames themselves, but the loss function based on the errors of the reconstructed frames, not the predicted embeddings -- this creates a discrepancy between the training objective and the model output. We propose a Semantic Concentration Multi-Head Self-Attention (SCMHSA) architecture, which effectively mitigates semantic dilution in transformer-based next-frame prediction. Additionally, we introduce a loss function that optimizes SCMHSA in the latent space, aligning the training objective more closely with the model output. Our method demonstrates superior performance compared to the original transformer-based predictors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16753v1-abstract-full').style.display = 'none'; document.getElementById('2501.16753v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15695">arXiv:2501.15695</a> <span> [<a href="https://arxiv.org/pdf/2501.15695">pdf</a>, <a href="https://arxiv.org/format/2501.15695">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Contextual Knowledge Sharing in Multi-Agent Reinforcement Learning with Decentralized Communication and Coordination </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+H">Hung Du</a>, <a href="/search/cs?searchtype=author&query=Thudumu%2C+S">Srikanth Thudumu</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+H">Hy Nguyen</a>, <a href="/search/cs?searchtype=author&query=Vasa%2C+R">Rajesh Vasa</a>, <a href="/search/cs?searchtype=author&query=Mouzakis%2C+K">Kon Mouzakis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15695v1-abstract-short" style="display: inline;"> Decentralized Multi-Agent Reinforcement Learning (Dec-MARL) has emerged as a pivotal approach for addressing complex tasks in dynamic environments. Existing Multi-Agent Reinforcement Learning (MARL) methodologies typically assume a shared objective among agents and rely on centralized control. However, many real-world scenarios feature agents with individual goals and limited observability of othe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15695v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15695v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15695v1-abstract-full" style="display: none;"> Decentralized Multi-Agent Reinforcement Learning (Dec-MARL) has emerged as a pivotal approach for addressing complex tasks in dynamic environments. Existing Multi-Agent Reinforcement Learning (MARL) methodologies typically assume a shared objective among agents and rely on centralized control. However, many real-world scenarios feature agents with individual goals and limited observability of other agents, complicating coordination and hindering adaptability. Existing Dec-MARL strategies prioritize either communication or coordination, lacking an integrated approach that leverages both. This paper presents a novel Dec-MARL framework that integrates peer-to-peer communication and coordination, incorporating goal-awareness and time-awareness into the agents' knowledge-sharing processes. Our framework equips agents with the ability to (i) share contextually relevant knowledge to assist other agents, and (ii) reason based on information acquired from multiple agents, while considering their own goals and the temporal context of prior knowledge. We evaluate our approach through several complex multi-agent tasks in environments with dynamically appearing obstacles. Our work demonstrates that incorporating goal-aware and time-aware knowledge sharing significantly enhances overall performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15695v1-abstract-full').style.display = 'none'; document.getElementById('2501.15695v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15588">arXiv:2501.15588</a> <span> [<a href="https://arxiv.org/pdf/2501.15588">pdf</a>, <a href="https://arxiv.org/format/2501.15588">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Tumor Detection, Segmentation and Classification Challenge on Automated 3D Breast Ultrasound: The TDSC-ABUS Challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+G">Gongning Luo</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+M">Mingwang Xu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hongyu Chen</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+X">Xinjie Liang</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+X">Xing Tao</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+D">Dong Ni</a>, <a href="/search/cs?searchtype=author&query=Jeong%2C+H">Hyunsu Jeong</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+C">Chulhong Kim</a>, <a href="/search/cs?searchtype=author&query=Stock%2C+R">Raphael Stock</a>, <a href="/search/cs?searchtype=author&query=Baumgartner%2C+M">Michael Baumgartner</a>, <a href="/search/cs?searchtype=author&query=Kirchhoff%2C+Y">Yannick Kirchhoff</a>, <a href="/search/cs?searchtype=author&query=Rokuss%2C+M">Maximilian Rokuss</a>, <a href="/search/cs?searchtype=author&query=Maier-Hein%2C+K">Klaus Maier-Hein</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhikai Yang</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+T">Tianyu Fan</a>, <a href="/search/cs?searchtype=author&query=Boutry%2C+N">Nicolas Boutry</a>, <a href="/search/cs?searchtype=author&query=Tereshchenko%2C+D">Dmitry Tereshchenko</a>, <a href="/search/cs?searchtype=author&query=Moine%2C+A">Arthur Moine</a>, <a href="/search/cs?searchtype=author&query=Charmetant%2C+M">Maximilien Charmetant</a>, <a href="/search/cs?searchtype=author&query=Sauer%2C+J">Jan Sauer</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hao Du</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+X">Xiang-Hui Bai</a>, <a href="/search/cs?searchtype=author&query=Raikar%2C+V+P">Vipul Pai Raikar</a>, <a href="/search/cs?searchtype=author&query=Montoya-del-Angel%2C+R">Ricardo Montoya-del-Angel</a>, <a href="/search/cs?searchtype=author&query=Marti%2C+R">Robert Marti</a> , et al. (12 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15588v1-abstract-short" style="display: inline;"> Breast cancer is one of the most common causes of death among women worldwide. Early detection helps in reducing the number of deaths. Automated 3D Breast Ultrasound (ABUS) is a newer approach for breast screening, which has many advantages over handheld mammography such as safety, speed, and higher detection rate of breast cancer. Tumor detection, segmentation, and classification are key componen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15588v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15588v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15588v1-abstract-full" style="display: none;"> Breast cancer is one of the most common causes of death among women worldwide. Early detection helps in reducing the number of deaths. Automated 3D Breast Ultrasound (ABUS) is a newer approach for breast screening, which has many advantages over handheld mammography such as safety, speed, and higher detection rate of breast cancer. Tumor detection, segmentation, and classification are key components in the analysis of medical images, especially challenging in the context of 3D ABUS due to the significant variability in tumor size and shape, unclear tumor boundaries, and a low signal-to-noise ratio. The lack of publicly accessible, well-labeled ABUS datasets further hinders the advancement of systems for breast tumor analysis. Addressing this gap, we have organized the inaugural Tumor Detection, Segmentation, and Classification Challenge on Automated 3D Breast Ultrasound 2023 (TDSC-ABUS2023). This initiative aims to spearhead research in this field and create a definitive benchmark for tasks associated with 3D ABUS image analysis. In this paper, we summarize the top-performing algorithms from the challenge and provide critical analysis for ABUS image examination. We offer the TDSC-ABUS challenge as an open-access platform at https://tdsc-abus2023.grand-challenge.org/ to benchmark and inspire future developments in algorithmic research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15588v1-abstract-full').style.display = 'none'; document.getElementById('2501.15588v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14000">arXiv:2501.14000</a> <span> [<a href="https://arxiv.org/pdf/2501.14000">pdf</a>, <a href="https://arxiv.org/format/2501.14000">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Local Control Networks (LCNs): Optimizing Flexibility in Neural Network Data Pattern Capture </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+H">Hy Nguyen</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+D+K">Duy Khoa Pham</a>, <a href="/search/cs?searchtype=author&query=Thudumu%2C+S">Srikanth Thudumu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hung Du</a>, <a href="/search/cs?searchtype=author&query=Vasa%2C+R">Rajesh Vasa</a>, <a href="/search/cs?searchtype=author&query=Mouzakis%2C+K">Kon Mouzakis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14000v1-abstract-short" style="display: inline;"> The widespread use of Multi-layer perceptrons (MLPs) often relies on a fixed activation function (e.g., ReLU, Sigmoid, Tanh) for all nodes within the hidden layers. While effective in many scenarios, this uniformity may limit the networks ability to capture complex data patterns. We argue that employing the same activation function at every node is suboptimal and propose leveraging different activ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14000v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14000v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14000v1-abstract-full" style="display: none;"> The widespread use of Multi-layer perceptrons (MLPs) often relies on a fixed activation function (e.g., ReLU, Sigmoid, Tanh) for all nodes within the hidden layers. While effective in many scenarios, this uniformity may limit the networks ability to capture complex data patterns. We argue that employing the same activation function at every node is suboptimal and propose leveraging different activation functions at each node to increase flexibility and adaptability. To achieve this, we introduce Local Control Networks (LCNs), which leverage B-spline functions to enable distinct activation curves at each node. Our mathematical analysis demonstrates the properties and benefits of LCNs over conventional MLPs. In addition, we demonstrate that more complex architectures, such as Kolmogorov-Arnold Networks (KANs), are unnecessary in certain scenarios, and LCNs can be a more efficient alternative. Empirical experiments on various benchmarks and datasets validate our theoretical findings. In computer vision tasks, LCNs achieve marginal improvements over MLPs and outperform KANs by approximately 5\%, while also being more computationally efficient than KANs. In basic machine learning tasks, LCNs show a 1\% improvement over MLPs and a 0.6\% improvement over KANs. For symbolic formula representation tasks, LCNs perform on par with KANs, with both architectures outperforming MLPs. Our findings suggest that diverse activations at the node level can lead to improved performance and efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14000v1-abstract-full').style.display = 'none'; document.getElementById('2501.14000v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13994">arXiv:2501.13994</a> <span> [<a href="https://arxiv.org/pdf/2501.13994">pdf</a>, <a href="https://arxiv.org/format/2501.13994">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> CSAOT: Cooperative Multi-Agent System for Active Object Tracking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+H">Hy Nguyen</a>, <a href="/search/cs?searchtype=author&query=Pham%2C+B">Bao Pham</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hung Du</a>, <a href="/search/cs?searchtype=author&query=Thudumu%2C+S">Srikanth Thudumu</a>, <a href="/search/cs?searchtype=author&query=Vasa%2C+R">Rajesh Vasa</a>, <a href="/search/cs?searchtype=author&query=Mouzakis%2C+K">Kon Mouzakis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13994v1-abstract-short" style="display: inline;"> Object Tracking is essential for many computer vision applications, such as autonomous navigation, surveillance, and robotics. Unlike Passive Object Tracking (POT), which relies on static camera viewpoints to detect and track objects across consecutive frames, Active Object Tracking (AOT) requires a controller agent to actively adjust its viewpoint to maintain visual contact with a moving target i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13994v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13994v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13994v1-abstract-full" style="display: none;"> Object Tracking is essential for many computer vision applications, such as autonomous navigation, surveillance, and robotics. Unlike Passive Object Tracking (POT), which relies on static camera viewpoints to detect and track objects across consecutive frames, Active Object Tracking (AOT) requires a controller agent to actively adjust its viewpoint to maintain visual contact with a moving target in complex environments. Existing AOT solutions are predominantly single-agent-based, which struggle in dynamic and complex scenarios due to limited information gathering and processing capabilities, often resulting in suboptimal decision-making. Alleviating these limitations necessitates the development of a multi-agent system where different agents perform distinct roles and collaborate to enhance learning and robustness in dynamic and complex environments. Although some multi-agent approaches exist for AOT, they typically rely on external auxiliary agents, which require additional devices, making them costly. In contrast, we introduce the Collaborative System for Active Object Tracking (CSAOT), a method that leverages multi-agent deep reinforcement learning (MADRL) and a Mixture of Experts (MoE) framework to enable multiple agents to operate on a single device, thereby improving tracking performance and reducing costs. Our approach enhances robustness against occlusions and rapid motion while optimizing camera movements to extend tracking duration. We validated the effectiveness of CSAOT on various interactive maps with dynamic and stationary obstacles. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13994v1-abstract-full').style.display = 'none'; document.getElementById('2501.13994v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13992">arXiv:2501.13992</a> <span> [<a href="https://arxiv.org/pdf/2501.13992">pdf</a>, <a href="https://arxiv.org/format/2501.13992">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Dual-Branch HNSW Approach with Skip Bridges and LID-Driven Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+H">Hy Nguyen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+N+H">Nguyen Hung Nguyen</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+N+L+B">Nguyen Linh Bao Nguyen</a>, <a href="/search/cs?searchtype=author&query=Thudumu%2C+S">Srikanth Thudumu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hung Du</a>, <a href="/search/cs?searchtype=author&query=Vasa%2C+R">Rajesh Vasa</a>, <a href="/search/cs?searchtype=author&query=Mouzakis%2C+K">Kon Mouzakis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13992v1-abstract-short" style="display: inline;"> The Hierarchical Navigable Small World (HNSW) algorithm is widely used for approximate nearest neighbor (ANN) search, leveraging the principles of navigable small-world graphs. However, it faces some limitations. The first is the local optima problem, which arises from the algorithm's greedy search strategy, selecting neighbors based solely on proximity at each step. This often leads to cluster di… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13992v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13992v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13992v1-abstract-full" style="display: none;"> The Hierarchical Navigable Small World (HNSW) algorithm is widely used for approximate nearest neighbor (ANN) search, leveraging the principles of navigable small-world graphs. However, it faces some limitations. The first is the local optima problem, which arises from the algorithm's greedy search strategy, selecting neighbors based solely on proximity at each step. This often leads to cluster disconnections. The second limitation is that HNSW frequently fails to achieve logarithmic complexity, particularly in high-dimensional datasets, due to the exhaustive traversal through each layer. To address these limitations, we propose a novel algorithm that mitigates local optima and cluster disconnections while enhancing the construction speed, maintaining inference speed. The first component is a dual-branch HNSW structure with LID-based insertion mechanisms, enabling traversal from multiple directions. This improves outlier node capture, enhances cluster connectivity, accelerates construction speed and reduces the risk of local minima. The second component incorporates a bridge-building technique that bypasses redundant intermediate layers, maintaining inference and making up the additional computational overhead introduced by the dual-branch structure. Experiments on various benchmarks and datasets showed that our algorithm outperforms the original HNSW in both accuracy and speed. We evaluated six datasets across Computer Vision (CV), and Natural Language Processing (NLP), showing recall improvements of 18\% in NLP, and up to 30\% in CV tasks while reducing the construction time by up to 20\% and maintaining the inference speed. We did not observe any trade-offs in our algorithm. Ablation studies revealed that LID-based insertion had the greatest impact on performance, followed by the dual-branch structure and bridge-building components. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13992v1-abstract-full').style.display = 'none'; document.getElementById('2501.13992v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13727">arXiv:2501.13727</a> <span> [<a href="https://arxiv.org/pdf/2501.13727">pdf</a>, <a href="https://arxiv.org/format/2501.13727">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Scalable Safe Multi-Agent Reinforcement Learning for Multi-Agent System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+H">Haikuo Du</a>, <a href="/search/cs?searchtype=author&query=Gou%2C+F">Fandi Gou</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+Y">Yunze Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13727v1-abstract-short" style="display: inline;"> Safety and scalability are two critical challenges faced by practical Multi-Agent Systems (MAS). However, existing Multi-Agent Reinforcement Learning (MARL) algorithms that rely solely on reward shaping are ineffective in ensuring safety, and their scalability is rather limited due to the fixed-size network output. To address these issues, we propose a novel framework, Scalable Safe MARL (SS-MARL)… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13727v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13727v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13727v1-abstract-full" style="display: none;"> Safety and scalability are two critical challenges faced by practical Multi-Agent Systems (MAS). However, existing Multi-Agent Reinforcement Learning (MARL) algorithms that rely solely on reward shaping are ineffective in ensuring safety, and their scalability is rather limited due to the fixed-size network output. To address these issues, we propose a novel framework, Scalable Safe MARL (SS-MARL), to enhance the safety and scalability of MARL methods. Leveraging the inherent graph structure of MAS, we design a multi-layer message passing network to aggregate local observations and communications of varying sizes. Furthermore, we develop a constrained joint policy optimization method in the setting of local observation to improve safety. Simulation experiments demonstrate that SS-MARL achieves a better trade-off between optimality and safety compared to baselines, and its scalability significantly outperforms the latest methods in scenarios with a large number of agents. The feasibility of our method is also verified by hardware implementation with Mecanum-wheeled vehicles. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13727v1-abstract-full').style.display = 'none'; document.getElementById('2501.13727v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11743">arXiv:2501.11743</a> <span> [<a href="https://arxiv.org/pdf/2501.11743">pdf</a>, <a href="https://arxiv.org/format/2501.11743">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation">stat.CO</span> </div> </div> <p class="title is-5 mathjax"> Non-Reversible Langevin Algorithms for Constrained Sampling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+H">Hengrong Du</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Q">Qi Feng</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+C">Changwei Tu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaoyu Wang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+L">Lingjiong Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11743v1-abstract-short" style="display: inline;"> We consider the constrained sampling problem where the goal is to sample from a target distribution on a constrained domain. We propose skew-reflected non-reversible Langevin dynamics (SRNLD), a continuous-time stochastic differential equation with skew-reflected boundary. We obtain non-asymptotic convergence rate of SRNLD to the target distribution in both total variation and 1-Wasserstein distan… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11743v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11743v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11743v1-abstract-full" style="display: none;"> We consider the constrained sampling problem where the goal is to sample from a target distribution on a constrained domain. We propose skew-reflected non-reversible Langevin dynamics (SRNLD), a continuous-time stochastic differential equation with skew-reflected boundary. We obtain non-asymptotic convergence rate of SRNLD to the target distribution in both total variation and 1-Wasserstein distances. By breaking reversibility, we show that the convergence is faster than the special case of the reversible dynamics. Based on the discretization of SRNLD, we propose skew-reflected non-reversible Langevin Monte Carlo (SRNLMC), and obtain non-asymptotic discretization error from SRNLD, and convergence guarantees to the target distribution in 1-Wasserstein distance. We show better performance guarantees than the projected Langevin Monte Carlo in the literature that is based on the reversible dynamics. Numerical experiments are provided for both synthetic and real datasets to show efficiency of the proposed algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11743v1-abstract-full').style.display = 'none'; document.getElementById('2501.11743v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09532">arXiv:2501.09532</a> <span> [<a href="https://arxiv.org/pdf/2501.09532">pdf</a>, <a href="https://arxiv.org/format/2501.09532">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> AdaFV: Rethinking of Visual-Language alignment for VLM acceleration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Han%2C+J">Jiayi Han</a>, <a href="/search/cs?searchtype=author&query=Du%2C+L">Liang Du</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yiwen Wu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xiangguo Zhou</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongwei Du</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+W">Weibo Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09532v2-abstract-short" style="display: inline;"> The success of VLMs often relies on the dynamic high-resolution schema that adaptively augments the input images to multiple crops, so that the details of the images can be retained. However, such approaches result in a large number of redundant visual tokens, thus significantly reducing the efficiency of the VLMs. To improve the VLMs' efficiency without introducing extra training costs, many rese… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09532v2-abstract-full').style.display = 'inline'; document.getElementById('2501.09532v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09532v2-abstract-full" style="display: none;"> The success of VLMs often relies on the dynamic high-resolution schema that adaptively augments the input images to multiple crops, so that the details of the images can be retained. However, such approaches result in a large number of redundant visual tokens, thus significantly reducing the efficiency of the VLMs. To improve the VLMs' efficiency without introducing extra training costs, many research works are proposed to reduce the visual tokens by filtering the uninformative visual tokens or aggregating their information. Some approaches propose to reduce the visual tokens according to the self-attention of VLMs, which are biased, to result in inaccurate responses. The token reduction approaches solely rely on visual cues are text-agnostic, and fail to focus on the areas that are most relevant to the question, especially when the queried objects are non-salient to the image. In this work, we first conduct experiments to show that the original text embeddings are aligned with the visual tokens, without bias on the tailed visual tokens. We then propose a self-adaptive cross-modality attention mixture mechanism that dynamically leverages the effectiveness of visual saliency and text-to-image similarity in the pre-LLM layers to select the visual tokens that are informative. Extensive experiments demonstrate that the proposed approach achieves state-of-the-art training-free VLM acceleration performance, especially when the reduction rate is sufficiently large. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09532v2-abstract-full').style.display = 'none'; document.getElementById('2501.09532v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09391">arXiv:2501.09391</a> <span> [<a href="https://arxiv.org/pdf/2501.09391">pdf</a>, <a href="https://arxiv.org/format/2501.09391">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Contract-Inspired Contest Theory for Controllable Image Generation in Mobile Edge Metaverse </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guangyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09391v1-abstract-short" style="display: inline;"> The rapid advancement of immersive technologies has propelled the development of the Metaverse, where the convergence of virtual and physical realities necessitates the generation of high-quality, photorealistic images to enhance user experience. However, generating these images, especially through Generative Diffusion Models (GDMs), in mobile edge computing environments presents significant chall… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09391v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09391v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09391v1-abstract-full" style="display: none;"> The rapid advancement of immersive technologies has propelled the development of the Metaverse, where the convergence of virtual and physical realities necessitates the generation of high-quality, photorealistic images to enhance user experience. However, generating these images, especially through Generative Diffusion Models (GDMs), in mobile edge computing environments presents significant challenges due to the limited computing resources of edge devices and the dynamic nature of wireless networks. This paper proposes a novel framework that integrates contract-inspired contest theory, Deep Reinforcement Learning (DRL), and GDMs to optimize image generation in these resource-constrained environments. The framework addresses the critical challenges of resource allocation and semantic data transmission quality by incentivizing edge devices to efficiently transmit high-quality semantic data, which is essential for creating realistic and immersive images. The use of contest and contract theory ensures that edge devices are motivated to allocate resources effectively, while DRL dynamically adjusts to network conditions, optimizing the overall image generation process. Experimental results demonstrate that the proposed approach not only improves the quality of generated images but also achieves superior convergence speed and stability compared to traditional methods. This makes the framework particularly effective for optimizing complex resource allocation tasks in mobile edge Metaverse applications, offering enhanced performance and efficiency in creating immersive virtual environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09391v1-abstract-full').style.display = 'none'; document.getElementById('2501.09391v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 10figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09383">arXiv:2501.09383</a> <span> [<a href="https://arxiv.org/pdf/2501.09383">pdf</a>, <a href="https://arxiv.org/format/2501.09383">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Contextual Caching for Mobile Edge Large Language Model Service </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guangyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yinqiu Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09383v1-abstract-short" style="display: inline;"> Mobile edge Large Language Model (LLM) deployments face inherent constraints, such as limited computational resources and network bandwidth. Although Retrieval-Augmented Generation (RAG) mitigates some challenges by integrating external knowledge bases, inefficient cache management can still result in high retrieval latency and frequent cache updates. To address these issues, we propose an Adaptiv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09383v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09383v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09383v1-abstract-full" style="display: none;"> Mobile edge Large Language Model (LLM) deployments face inherent constraints, such as limited computational resources and network bandwidth. Although Retrieval-Augmented Generation (RAG) mitigates some challenges by integrating external knowledge bases, inefficient cache management can still result in high retrieval latency and frequent cache updates. To address these issues, we propose an Adaptive Contextual Caching (ACC) framework that anticipates user needs by proactively caching semantically relevant data for mobile-edge LLMs. ACC utilizes a deep reinforcement learning (DRL) module to refine cache replacement policies, balancing user context, document similarity, and the overhead associated with cache misses. Experimental results demonstrate that ACC increases cache hit rates to over 80\% after only 11 training episodes, outperforming FIFO, LRU, and semantic-only caching while reducing retrieval latency by up to 40\%. In particular, ACC also reduces local caching overhead (i.e., the cost of updating the cache when a miss occurs) by as much as 55\%, enabling scalable, low-latency LLM services in resource-constrained edge environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09383v1-abstract-full').style.display = 'none'; document.getElementById('2501.09383v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05415">arXiv:2501.05415</a> <span> [<a href="https://arxiv.org/pdf/2501.05415">pdf</a>, <a href="https://arxiv.org/format/2501.05415">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Uncertainty-aware Knowledge Tracing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+W">Weihua Cheng</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hanwen Du</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chunxiao Li</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+E">Ersheng Ni</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+L">Liangdi Tan</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+T">Tianqi Xu</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+Y">Yongxin Ni</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05415v2-abstract-short" style="display: inline;"> Knowledge Tracing (KT) is crucial in education assessment, which focuses on depicting students' learning states and assessing students' mastery of subjects. With the rise of modern online learning platforms, particularly massive open online courses (MOOCs), an abundance of interaction data has greatly advanced the development of the KT technology. Previous research commonly adopts deterministic re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05415v2-abstract-full').style.display = 'inline'; document.getElementById('2501.05415v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05415v2-abstract-full" style="display: none;"> Knowledge Tracing (KT) is crucial in education assessment, which focuses on depicting students' learning states and assessing students' mastery of subjects. With the rise of modern online learning platforms, particularly massive open online courses (MOOCs), an abundance of interaction data has greatly advanced the development of the KT technology. Previous research commonly adopts deterministic representation to capture students' knowledge states, which neglects the uncertainty during student interactions and thus fails to model the true knowledge state in learning process. In light of this, we propose an Uncertainty-Aware Knowledge Tracing model (UKT) which employs stochastic distribution embeddings to represent the uncertainty in student interactions, with a Wasserstein self-attention mechanism designed to capture the transition of state distribution in student learning behaviors. Additionally, we introduce the aleatory uncertainty-aware contrastive learning loss, which strengthens the model's robustness towards different types of uncertainties. Extensive experiments on six real-world datasets demonstrate that UKT not only significantly surpasses existing deep learning-based models in KT prediction, but also shows unique advantages in handling the uncertainty of student interactions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05415v2-abstract-full').style.display = 'none'; document.getElementById('2501.05415v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05040">arXiv:2501.05040</a> <span> [<a href="https://arxiv.org/pdf/2501.05040">pdf</a>, <a href="https://arxiv.org/format/2501.05040">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SWE-Fixer: Training Open-Source LLMs for Effective and Efficient GitHub Issue Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+C">Chengxing Xie</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bowen Li</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+C">Chang Gao</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">He Du</a>, <a href="/search/cs?searchtype=author&query=Lam%2C+W">Wai Lam</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+D">Difan Zou</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05040v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) have demonstrated remarkable proficiency across a variety of complex tasks. One significant application of LLMs is in tackling software engineering challenges, particularly in resolving real-world tasks on GitHub by fixing code based on the issues reported by the users. However, many current approaches rely on proprietary LLMs, which limits reproducibility, accessibili… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05040v2-abstract-full').style.display = 'inline'; document.getElementById('2501.05040v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05040v2-abstract-full" style="display: none;"> Large Language Models (LLMs) have demonstrated remarkable proficiency across a variety of complex tasks. One significant application of LLMs is in tackling software engineering challenges, particularly in resolving real-world tasks on GitHub by fixing code based on the issues reported by the users. However, many current approaches rely on proprietary LLMs, which limits reproducibility, accessibility, and transparency. The critical components of LLMs for addressing software engineering issues and how their capabilities can be effectively enhanced remain unclear. To address these challenges, we introduce SWE-Fixer, a novel open-source framework designed to effectively and efficiently resolve GitHub issues. SWE-Fixer comprises two essential modules: a code file retrieval module and a code editing module. The retrieval module employs BM25 along with a lightweight model to achieve coarse-to-fine file retrieval. Subsequently, the code editing module utilizes the other model to generate patches for the identified files. To mitigate the lack of publicly available datasets, we compile an extensive dataset that includes 110K GitHub issues along with their corresponding patches and train the two models of SWE-Fixer separately. We assess our approach on the SWE-Bench Lite and Verified benchmarks, achieving state-of-the-art performance among open-source models with scores of 24.7% and 32.8%, respectively. Additionally, our approach requires only two model calls per instance, making it significantly more efficient than existing methods. These results highlight the effectiveness of SWE-Fixer in real-world code-fixing scenarios. We will make our model, dataset, and code publicly available at https://github.com/InternLM/SWE-Fixer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05040v2-abstract-full').style.display = 'none'; document.getElementById('2501.05040v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Our code, data, and model will be released at https://github.com/InternLM/SWE-Fixer</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03837">arXiv:2501.03837</a> <span> [<a href="https://arxiv.org/pdf/2501.03837">pdf</a>, <a href="https://arxiv.org/ps/2501.03837">ps</a>, <a href="https://arxiv.org/format/2501.03837">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Symbolic Computation">cs.SC</span> </div> </div> <p class="title is-5 mathjax"> A Unification of Zeilberger's Algorithm and Its q-Analogue </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shaoshi Chen</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hao Du</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yiman Gao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Hui Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Ziming Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03837v1-abstract-short" style="display: inline;"> We adapt the theory of normal and special polynomials from symbolic integration to the summation setting, and then built up a general framework embracing both the usual shift case and the q-shift case. In the context of this general framework, we develop a unified reduction algorithm, and subsequently a creative telescoping algorithm, applicable to both hypergeometric terms and their q-analogues.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03837v1-abstract-full').style.display = 'inline'; document.getElementById('2501.03837v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03837v1-abstract-full" style="display: none;"> We adapt the theory of normal and special polynomials from symbolic integration to the summation setting, and then built up a general framework embracing both the usual shift case and the q-shift case. In the context of this general framework, we develop a unified reduction algorithm, and subsequently a creative telescoping algorithm, applicable to both hypergeometric terms and their q-analogues. Our algorithms allow to split up the usual shift case and the q-shift case only when it is really necessary, and thus instantly reveal the intrinsic differences between these two cases. Computational experiments are also provided. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03837v1-abstract-full').style.display = 'none'; document.getElementById('2501.03837v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03278">arXiv:2501.03278</a> <span> [<a href="https://arxiv.org/pdf/2501.03278">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s41524-024-01444-x">10.1038/s41524-024-01444-x <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> DenseGNN: universal and scalable deeper graph neural networks for high-performance property prediction in crystals and molecules </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongwei Du</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiamin Wang</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+J">Jian Hui</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lanting Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hong Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03278v1-abstract-short" style="display: inline;"> Generative models generate vast numbers of hypothetical materials, necessitating fast, accurate models for property prediction. Graph Neural Networks (GNNs) excel in this domain but face challenges like high training costs, domain adaptation issues, and over-smoothing. We introduce DenseGNN, which employs Dense Connectivity Network (DCN), Hierarchical Node-Edge-Graph Residual Networks (HRN), and L… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03278v1-abstract-full').style.display = 'inline'; document.getElementById('2501.03278v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03278v1-abstract-full" style="display: none;"> Generative models generate vast numbers of hypothetical materials, necessitating fast, accurate models for property prediction. Graph Neural Networks (GNNs) excel in this domain but face challenges like high training costs, domain adaptation issues, and over-smoothing. We introduce DenseGNN, which employs Dense Connectivity Network (DCN), Hierarchical Node-Edge-Graph Residual Networks (HRN), and Local Structure Order Parameters Embedding (LOPE) to address these challenges. DenseGNN achieves state-of-the-art performance on datasets such as JARVIS-DFT, Materials Project, and QM9, improving the performance of models like GIN, Schnet, and Hamnet on materials datasets. By optimizing atomic embeddings and reducing computational costs, DenseGNN enables deeper architectures and surpasses other GNNs in crystal structure distinction, approaching X-ray diffraction method accuracy. This advances materials discovery and design. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03278v1-abstract-full').style.display = 'none'; document.getElementById('2501.03278v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">DenseGNN optimizes computational efficiency and accuracy in predicting material properties using DCN, HRN, and LOPE. It enhances transferability and overcomes over-smoothing, enabling deep architectures. Performance improvements on JARVIS-DFT, Materials Project, and QM9 datasets advance materials discovery and design</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> npj Comput Mater 10, 292 (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02952">arXiv:2501.02952</a> <span> [<a href="https://arxiv.org/pdf/2501.02952">pdf</a>, <a href="https://arxiv.org/format/2501.02952">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Online Collaborative Resource Allocation and Task Offloading for Multi-access Edge Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+G">Geng Sun</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+M">Minghua Yuan</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Z">Zemin Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Z">Zhu Han</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02952v1-abstract-short" style="display: inline;"> Multi-access edge computing (MEC) is emerging as a promising paradigm to provide flexible computing services close to user devices (UDs). However, meeting the computation-hungry and delay-sensitive demands of UDs faces several challenges, including the resource constraints of MEC servers, inherent dynamic and complex features in the MEC system, and difficulty in dealing with the time-coupled and d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02952v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02952v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02952v1-abstract-full" style="display: none;"> Multi-access edge computing (MEC) is emerging as a promising paradigm to provide flexible computing services close to user devices (UDs). However, meeting the computation-hungry and delay-sensitive demands of UDs faces several challenges, including the resource constraints of MEC servers, inherent dynamic and complex features in the MEC system, and difficulty in dealing with the time-coupled and decision-coupled optimization. In this work, we first present an edge-cloud collaborative MEC architecture, where the MEC servers and cloud collaboratively provide offloading services for UDs. Moreover, we formulate an energy-efficient and delay-aware optimization problem (EEDAOP) to minimize the energy consumption of UDs under the constraints of task deadlines and long-term queuing delays. Since the problem is proved to be non-convex mixed integer nonlinear programming (MINLP), we propose an online joint communication resource allocation and task offloading approach (OJCTA). Specifically, we transform EEDAOP into a real-time optimization problem by employing the Lyapunov optimization framework. Then, to solve the real-time optimization problem, we propose a communication resource allocation and task offloading optimization method by employing the Tammer decomposition mechanism, convex optimization method, bilateral matching mechanism, and dependent rounding method. Simulation results demonstrate that the proposed OJCTA can achieve superior system performance compared to the benchmark approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02952v1-abstract-full').style.display = 'none'; document.getElementById('2501.02952v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02787">arXiv:2501.02787</a> <span> [<a href="https://arxiv.org/pdf/2501.02787">pdf</a>, <a href="https://arxiv.org/format/2501.02787">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Joint Optimization of UAV-Carried IRS for Urban Low Altitude mmWave Communications with Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+W">Wenwen Xie</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+G">Geng Sun</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+B">Bei Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiahui Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02787v1-abstract-short" style="display: inline;"> Emerging technologies in sixth generation (6G) of wireless communications, such as terahertz communication and ultra-massive multiple-input multiple-output, present promising prospects. Despite the high data rate potential of millimeter wave communications, millimeter wave (mmWave) communications in urban low altitude economy (LAE) environments are constrained by challenges such as signal attenuat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02787v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02787v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02787v1-abstract-full" style="display: none;"> Emerging technologies in sixth generation (6G) of wireless communications, such as terahertz communication and ultra-massive multiple-input multiple-output, present promising prospects. Despite the high data rate potential of millimeter wave communications, millimeter wave (mmWave) communications in urban low altitude economy (LAE) environments are constrained by challenges such as signal attenuation and multipath interference. Specially, in urban environments, mmWave communication experiences significant attenuation due to buildings, owing to its short wavelength, which necessitates developing innovative approaches to improve the robustness of such communications in LAE networking. In this paper, we explore the use of an unmanned aerial vehicle (UAV)-carried intelligent reflecting surface (IRS) to support low altitude mmWave communication. Specifically, we consider a typical urban low altitude communication scenario where a UAV-carried IRS establishes a line-of-sight (LoS) channel between the mobile users and a source user (SU) despite the presence of obstacles. Subsequently, we formulate an optimization problem aimed at maximizing the transmission rates and minimizing the energy consumption of the UAV by jointly optimizing phase shifts of the IRS and UAV trajectory. Given the non-convex nature of the problem and its high dynamics, we propose a deep reinforcement learning-based approach incorporating neural episodic control, long short-term memory, and an IRS phase shift control method to enhance the stability and accelerate the convergence. Simulation results show that the proposed algorithm effectively resolves the problem and surpasses other benchmark algorithms in various performances. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02787v1-abstract-full').style.display = 'none'; document.getElementById('2501.02787v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02350">arXiv:2501.02350</a> <span> [<a href="https://arxiv.org/pdf/2501.02350">pdf</a>, <a href="https://arxiv.org/format/2501.02350">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> PM-Dedup: Secure Deduplication with Partial Migration from Cloud to Edge Servers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ke%2C+Z">Zhaokang Ke</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+H">Haoyu Gong</a>, <a href="/search/cs?searchtype=author&query=Du%2C+D+H+C">David H. C. Du</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02350v1-abstract-short" style="display: inline;"> Currently, an increasing number of users and enterprises are storing their data in the cloud but do not fully trust cloud providers with their data in plaintext form. To address this concern, they encrypt their data before uploading it to the cloud. However, encryption with different keys means that even identical data will become different ciphertexts, making deduplication less effective. Encrypt… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02350v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02350v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02350v1-abstract-full" style="display: none;"> Currently, an increasing number of users and enterprises are storing their data in the cloud but do not fully trust cloud providers with their data in plaintext form. To address this concern, they encrypt their data before uploading it to the cloud. However, encryption with different keys means that even identical data will become different ciphertexts, making deduplication less effective. Encrypted deduplication avoids this issue by ensuring that identical data chunks generate the same ciphertext with content-based keys, enabling the cloud to efficiently identify and remove duplicates even in encrypted form. Current encrypted data deduplication work can be classified into two types: target-based and source-based. Target-based encrypted deduplication requires clients to upload all encrypted chunks (the basic unit of deduplication) to the cloud with high network bandwidth overhead. Source-based deduplication involves clients uploading fingerprints (hashes) of encrypted chunks for duplicate checking and only uploading unique encrypted chunks, which reduces network transfer but introduces high latency and potential side-channel attacks, which need to be mitigated by Proof of Ownership (PoW), and high computing overhead of the cloud. So, reducing the latency and the overheads of network and cloud while ensuring security has become a significant challenge for secure data deduplication in cloud storage. In response to this challenge, we present PM-Dedup, a novel secure source-based deduplication approach that relocates a portion of the deduplication checking process and PoW tasks from the cloud to the trusted execution environments (TEEs) in the client-side edge servers. We also propose various designs to enhance the security and efficiency of data deduplication. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02350v1-abstract-full').style.display = 'none'; document.getElementById('2501.02350v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02189">arXiv:2501.02189</a> <span> [<a href="https://arxiv.org/pdf/2501.02189">pdf</a>, <a href="https://arxiv.org/format/2501.02189">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Benchmark Evaluations, Applications, and Challenges of Large Vision Language Models: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zongxia Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiyang Wu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Nghiem%2C+H">Huy Nghiem</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+G">Guangyao Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02189v3-abstract-short" style="display: inline;"> Multimodal Vision Language Models (VLMs) have emerged as a transformative technology at the intersection of computer vision and natural language processing, enabling machines to perceive and reason about the world through both visual and textual modalities. For example, models such as CLIP, Claude, and GPT-4V demonstrate strong reasoning and understanding abilities on visual and textual data and b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02189v3-abstract-full').style.display = 'inline'; document.getElementById('2501.02189v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02189v3-abstract-full" style="display: none;"> Multimodal Vision Language Models (VLMs) have emerged as a transformative technology at the intersection of computer vision and natural language processing, enabling machines to perceive and reason about the world through both visual and textual modalities. For example, models such as CLIP, Claude, and GPT-4V demonstrate strong reasoning and understanding abilities on visual and textual data and beat classical single modality vision models on zero-shot classification. Despite their rapid advancements in research and growing popularity in applications, a comprehensive survey of existing studies on VLMs is notably lacking, particularly for researchers aiming to leverage VLMs in their specific domains. To this end, we provide a systematic overview of VLMs in the following aspects: model information of the major VLMs developed over the past five years (2019-2024); the main architectures and training methods of these VLMs; summary and categorization of the popular benchmarks and evaluation metrics of VLMs; the applications of VLMs including embodied agents, robotics, and video generation; the challenges and issues faced by current VLMs such as hallucination, fairness, and safety. Detailed collections including papers and model repository links are listed in https://github.com/zli12321/Awesome-VLM-Papers-And-Models.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02189v3-abstract-full').style.display = 'none'; document.getElementById('2501.02189v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">35 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01141">arXiv:2501.01141</a> <span> [<a href="https://arxiv.org/pdf/2501.01141">pdf</a>, <a href="https://arxiv.org/ps/2501.01141">ps</a>, <a href="https://arxiv.org/format/2501.01141">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Embodied AI-Enhanced Vehicular Networks: An Integrated Large Language Models and Reinforcement Learning Method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruichen Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+C">Changyuan Zhao</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Sawadsitang%2C+S">Suttinee Sawadsitang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+X">Xuemin Shen</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01141v1-abstract-short" style="display: inline;"> This paper investigates adaptive transmission strategies in embodied AI-enhanced vehicular networks by integrating large language models (LLMs) for semantic information extraction and deep reinforcement learning (DRL) for decision-making. The proposed framework aims to optimize both data transmission efficiency and decision accuracy by formulating an optimization problem that incorporates the Webe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01141v1-abstract-full').style.display = 'inline'; document.getElementById('2501.01141v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01141v1-abstract-full" style="display: none;"> This paper investigates adaptive transmission strategies in embodied AI-enhanced vehicular networks by integrating large language models (LLMs) for semantic information extraction and deep reinforcement learning (DRL) for decision-making. The proposed framework aims to optimize both data transmission efficiency and decision accuracy by formulating an optimization problem that incorporates the Weber-Fechner law, serving as a metric for balancing bandwidth utilization and quality of experience (QoE). Specifically, we employ the large language and vision assistant (LLAVA) model to extract critical semantic information from raw image data captured by embodied AI agents (i.e., vehicles), reducing transmission data size by approximately more than 90\% while retaining essential content for vehicular communication and decision-making. In the dynamic vehicular environment, we employ a generalized advantage estimation-based proximal policy optimization (GAE-PPO) method to stabilize decision-making under uncertainty. Simulation results show that attention maps from LLAVA highlight the model's focus on relevant image regions, enhancing semantic representation accuracy. Additionally, our proposed transmission strategy improves QoE by up to 36\% compared to DDPG and accelerates convergence by reducing required steps by up to 47\% compared to pure PPO. Further analysis indicates that adapting semantic symbol length provides an effective trade-off between transmission quality and bandwidth, achieving up to a 61.4\% improvement in QoE when scaling from 4 to 8 vehicles. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01141v1-abstract-full').style.display = 'none'; document.getElementById('2501.01141v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.16504">arXiv:2412.16504</a> <span> [<a href="https://arxiv.org/pdf/2412.16504">pdf</a>, <a href="https://arxiv.org/ps/2412.16504">ps</a>, <a href="https://arxiv.org/format/2412.16504">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Privacy in Fine-tuning Large Language Models: Attacks, Defenses, and Future Directions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+H">Hao Du</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shang Liu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+L">Lele Zheng</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yang Cao</a>, <a href="/search/cs?searchtype=author&query=Nakamura%2C+A">Atsuyoshi Nakamura</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Lei Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.16504v1-abstract-short" style="display: inline;"> Fine-tuning has emerged as a critical process in leveraging Large Language Models (LLMs) for specific downstream tasks, enabling these models to achieve state-of-the-art performance across various domains. However, the fine-tuning process often involves sensitive datasets, introducing privacy risks that exploit the unique characteristics of this stage. In this paper, we provide a comprehensive sur… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16504v1-abstract-full').style.display = 'inline'; document.getElementById('2412.16504v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.16504v1-abstract-full" style="display: none;"> Fine-tuning has emerged as a critical process in leveraging Large Language Models (LLMs) for specific downstream tasks, enabling these models to achieve state-of-the-art performance across various domains. However, the fine-tuning process often involves sensitive datasets, introducing privacy risks that exploit the unique characteristics of this stage. In this paper, we provide a comprehensive survey of privacy challenges associated with fine-tuning LLMs, highlighting vulnerabilities to various privacy attacks, including membership inference, data extraction, and backdoor attacks. We further review defense mechanisms designed to mitigate privacy risks in the fine-tuning phase, such as differential privacy, federated learning, and knowledge unlearning, discussing their effectiveness and limitations in addressing privacy risks and maintaining model utility. By identifying key gaps in existing research, we highlight challenges and propose directions to advance the development of privacy-preserving methods for fine-tuning LLMs, promoting their responsible use in diverse applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16504v1-abstract-full').style.display = 'none'; document.getElementById('2412.16504v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.13549">arXiv:2412.13549</a> <span> [<a href="https://arxiv.org/pdf/2412.13549">pdf</a>, <a href="https://arxiv.org/format/2412.13549">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> EscapeBench: Pushing Language Models to Think Outside the Box </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qian%2C+C">Cheng Qian</a>, <a href="/search/cs?searchtype=author&query=Han%2C+P">Peixuan Han</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Q">Qinyu Luo</a>, <a href="/search/cs?searchtype=author&query=He%2C+B">Bingxiang He</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiusi Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuji Zhang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyi Du</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+J">Jiarui Yao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaocheng Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Denghui Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yunzhu Li</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+H">Heng Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.13549v1-abstract-short" style="display: inline;"> Language model agents excel in long-session planning and reasoning, but existing benchmarks primarily focus on goal-oriented tasks with explicit objectives, neglecting creative adaptation in unfamiliar environments. To address this, we introduce EscapeBench, a benchmark suite of room escape game environments designed to challenge agents with creative reasoning, unconventional tool use, and iterati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13549v1-abstract-full').style.display = 'inline'; document.getElementById('2412.13549v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.13549v1-abstract-full" style="display: none;"> Language model agents excel in long-session planning and reasoning, but existing benchmarks primarily focus on goal-oriented tasks with explicit objectives, neglecting creative adaptation in unfamiliar environments. To address this, we introduce EscapeBench, a benchmark suite of room escape game environments designed to challenge agents with creative reasoning, unconventional tool use, and iterative problem-solving to uncover implicit goals. Our results show that current LM models, despite employing working memory and Chain-of-Thought reasoning, achieve only 15% average progress without hints, highlighting their limitations in creativity. To bridge this gap, we propose EscapeAgent, a framework designed to enhance creative reasoning through Foresight (innovative tool use) and Reflection (identifying unsolved tasks). Experiments show that EscapeAgent can execute action chains over 1,000 steps while maintaining logical coherence. It navigates and completes games with up to 40% fewer steps and hints, performs robustly across varying difficulty levels, and achieves higher action success rates with more efficient and innovative puzzle-solving strategies. All the data and codes are released. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13549v1-abstract-full').style.display = 'none'; document.getElementById('2412.13549v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 15 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.13441">arXiv:2412.13441</a> <span> [<a href="https://arxiv.org/pdf/2412.13441">pdf</a>, <a href="https://arxiv.org/format/2412.13441">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> FlashVTG: Feature Layering and Adaptive Score Handling Network for Video Temporal Grounding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+Z">Zhuo Cao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Bingqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Heming Du</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xin Yu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xue Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Sen Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.13441v1-abstract-short" style="display: inline;"> Text-guided Video Temporal Grounding (VTG) aims to localize relevant segments in untrimmed videos based on textual descriptions, encompassing two subtasks: Moment Retrieval (MR) and Highlight Detection (HD). Although previous typical methods have achieved commendable results, it is still challenging to retrieve short video moments. This is primarily due to the reliance on sparse and limited decode… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13441v1-abstract-full').style.display = 'inline'; document.getElementById('2412.13441v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.13441v1-abstract-full" style="display: none;"> Text-guided Video Temporal Grounding (VTG) aims to localize relevant segments in untrimmed videos based on textual descriptions, encompassing two subtasks: Moment Retrieval (MR) and Highlight Detection (HD). Although previous typical methods have achieved commendable results, it is still challenging to retrieve short video moments. This is primarily due to the reliance on sparse and limited decoder queries, which significantly constrain the accuracy of predictions. Furthermore, suboptimal outcomes often arise because previous methods rank predictions based on isolated predictions, neglecting the broader video context. To tackle these issues, we introduce FlashVTG, a framework featuring a Temporal Feature Layering (TFL) module and an Adaptive Score Refinement (ASR) module. The TFL module replaces the traditional decoder structure to capture nuanced video content variations across multiple temporal scales, while the ASR module improves prediction ranking by integrating context from adjacent moments and multi-temporal-scale features. Extensive experiments demonstrate that FlashVTG achieves state-of-the-art performance on four widely adopted datasets in both MR and HD. Specifically, on the QVHighlights dataset, it boosts mAP by 5.8% for MR and 3.3% for HD. For short-moment retrieval, FlashVTG increases mAP to 125% of previous SOTA performance. All these improvements are made without adding training burdens, underscoring its effectiveness. Our code is available at https://github.com/Zhuo-Cao/FlashVTG. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13441v1-abstract-full').style.display = 'none'; document.getElementById('2412.13441v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to WACV 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.08642">arXiv:2412.08642</a> <span> [<a href="https://arxiv.org/pdf/2412.08642">pdf</a>, <a href="https://arxiv.org/format/2412.08642">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Generative Semantic Communication: Architectures, Technologies, and Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ren%2C+J">Jinke Ren</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yaping Sun</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+W">Weiwen Yuan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chongjie Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xianda Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yingbin Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Ziwei Zhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+F">Fangxin Wang</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+S">Shuguang Cui</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.08642v1-abstract-short" style="display: inline;"> This paper delves into the applications of generative artificial intelligence (GAI) in semantic communication (SemCom) and presents a thorough study. Three popular SemCom systems enabled by classical GAI models are first introduced, including variational autoencoders, generative adversarial networks, and diffusion models. For each system, the fundamental concept of the GAI model, the corresponding… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08642v1-abstract-full').style.display = 'inline'; document.getElementById('2412.08642v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.08642v1-abstract-full" style="display: none;"> This paper delves into the applications of generative artificial intelligence (GAI) in semantic communication (SemCom) and presents a thorough study. Three popular SemCom systems enabled by classical GAI models are first introduced, including variational autoencoders, generative adversarial networks, and diffusion models. For each system, the fundamental concept of the GAI model, the corresponding SemCom architecture, and the associated literature review of recent efforts are elucidated. Then, a novel generative SemCom system is proposed by incorporating the cutting-edge GAI technology-large language models (LLMs). This system features two LLM-based AI agents at both the transmitter and receiver, serving as "brains" to enable powerful information understanding and content regeneration capabilities, respectively. This innovative design allows the receiver to directly generate the desired content, instead of recovering the bit stream, based on the coded semantic information conveyed by the transmitter. Therefore, it shifts the communication mindset from "information recovery" to "information regeneration" and thus ushers in a new era of generative SemCom. A case study on point-to-point video retrieval is presented to demonstrate the superiority of the proposed generative SemCom system, showcasing a 99.98% reduction in communication overhead and a 53% improvement in retrieval accuracy compared to the traditional communication system. Furthermore, four typical application scenarios for generative SemCom are delineated, followed by a discussion of three open issues warranting future investigation. In a nutshell, this paper provides a holistic set of guidelines for applying GAI in SemCom, paving the way for the efficient implementation of generative SemCom in future wireless networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08642v1-abstract-full').style.display = 'none'; document.getElementById('2412.08642v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.07183">arXiv:2412.07183</a> <span> [<a href="https://arxiv.org/pdf/2412.07183">pdf</a>, <a href="https://arxiv.org/format/2412.07183">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Exploring What Why and How: A Multifaceted Benchmark for Causation Understanding of Video Anomaly </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+H">Hang Du</a>, <a href="/search/cs?searchtype=author&query=Nan%2C+G">Guoshun Nan</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+J">Jiawen Qian</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+W">Wangchenhui Wu</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+W">Wendi Deng</a>, <a href="/search/cs?searchtype=author&query=Mu%2C+H">Hanqing Mu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhenyan Chen</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+P">Pengxuan Mao</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+X">Xiaofeng Tao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jun Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.07183v1-abstract-short" style="display: inline;"> Recent advancements in video anomaly understanding (VAU) have opened the door to groundbreaking applications in various fields, such as traffic monitoring and industrial automation. While the current benchmarks in VAU predominantly emphasize the detection and localization of anomalies. Here, we endeavor to delve deeper into the practical aspects of VAU by addressing the essential questions: "what… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07183v1-abstract-full').style.display = 'inline'; document.getElementById('2412.07183v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.07183v1-abstract-full" style="display: none;"> Recent advancements in video anomaly understanding (VAU) have opened the door to groundbreaking applications in various fields, such as traffic monitoring and industrial automation. While the current benchmarks in VAU predominantly emphasize the detection and localization of anomalies. Here, we endeavor to delve deeper into the practical aspects of VAU by addressing the essential questions: "what anomaly occurred?", "why did it happen?", and "how severe is this abnormal event?". In pursuit of these answers, we introduce a comprehensive benchmark for Exploring the Causation of Video Anomalies (ECVA). Our benchmark is meticulously designed, with each video accompanied by detailed human annotations. Specifically, each instance of our ECVA involves three sets of human annotations to indicate "what", "why" and "how" of an anomaly, including 1) anomaly type, start and end times, and event descriptions, 2) natural language explanations for the cause of an anomaly, and 3) free text reflecting the effect of the abnormality. Building upon this foundation, we propose a novel prompt-based methodology that serves as a baseline for tackling the intricate challenges posed by ECVA. We utilize "hard prompt" to guide the model to focus on the critical parts related to video anomaly segments, and "soft prompt" to establish temporal and spatial relationships within these anomaly segments. Furthermore, we propose AnomEval, a specialized evaluation metric crafted to align closely with human judgment criteria for ECVA. This metric leverages the unique features of the ECVA dataset to provide a more comprehensive and reliable assessment of various video large language models. We demonstrate the efficacy of our approach through rigorous experimental analysis and delineate possible avenues for further investigation into the comprehension of video anomaly causation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07183v1-abstract-full').style.display = 'none'; document.getElementById('2412.07183v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to IEEE Transactions on Pattern Analysis and Machine Intelligence. arXiv admin note: substantial text overlap with arXiv:2405.00181</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.06088">arXiv:2412.06088</a> <span> [<a href="https://arxiv.org/pdf/2412.06088">pdf</a>, <a href="https://arxiv.org/format/2412.06088">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A4-Unet: Deformable Multi-Scale Attention Network for Brain Tumor Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+R">Ruoxin Wang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+T">Tianyi Tang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Haiming Du</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yuxuan Cheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+L">Lingjie Yang</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+X">Xiaohui Duan</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yunfang Yu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yu Zhou</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+D">Donglong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.06088v1-abstract-short" style="display: inline;"> Brain tumor segmentation models have aided diagnosis in recent years. However, they face MRI complexity and variability challenges, including irregular shapes and unclear boundaries, leading to noise, misclassification, and incomplete segmentation, thereby limiting accuracy. To address these issues, we adhere to an outstanding Convolutional Neural Networks (CNNs) design paradigm and propose a nove… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06088v1-abstract-full').style.display = 'inline'; document.getElementById('2412.06088v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.06088v1-abstract-full" style="display: none;"> Brain tumor segmentation models have aided diagnosis in recent years. However, they face MRI complexity and variability challenges, including irregular shapes and unclear boundaries, leading to noise, misclassification, and incomplete segmentation, thereby limiting accuracy. To address these issues, we adhere to an outstanding Convolutional Neural Networks (CNNs) design paradigm and propose a novel network named A4-Unet. In A4-Unet, Deformable Large Kernel Attention (DLKA) is incorporated in the encoder, allowing for improved capture of multi-scale tumors. Swin Spatial Pyramid Pooling (SSPP) with cross-channel attention is employed in a bottleneck further to study long-distance dependencies within images and channel relationships. To enhance accuracy, a Combined Attention Module (CAM) with Discrete Cosine Transform (DCT) orthogonality for channel weighting and convolutional element-wise multiplication is introduced for spatial weighting in the decoder. Attention gates (AG) are added in the skip connection to highlight the foreground while suppressing irrelevant background information. The proposed network is evaluated on three authoritative MRI brain tumor benchmarks and a proprietary dataset, and it achieves a 94.4% Dice score on the BraTS 2020 dataset, thereby establishing multiple new state-of-the-art benchmarks. The code is available here: https://github.com/WendyWAAAAANG/A4-Unet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06088v1-abstract-full').style.display = 'none'; document.getElementById('2412.06088v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 14 figures, IEEE International Conference on Bioinformatics and Biomedicine (BIBM) 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.06007">arXiv:2412.06007</a> <span> [<a href="https://arxiv.org/pdf/2412.06007">pdf</a>, <a href="https://arxiv.org/format/2412.06007">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Hallucination-aware Optimization for Large Language Model-empowered Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yinqiu Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guangyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruichen Zhang</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaibin Huang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.06007v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have significantly advanced communications fields, such as Telecom Q\&A, mathematical modeling, and coding. However, LLMs encounter an inherent issue known as hallucination, i.e., generating fact-conflicting or irrelevant content. This problem critically undermines the applicability of LLMs in communication systems yet has not been systematically explored. Hence, this… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06007v1-abstract-full').style.display = 'inline'; document.getElementById('2412.06007v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.06007v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have significantly advanced communications fields, such as Telecom Q\&A, mathematical modeling, and coding. However, LLMs encounter an inherent issue known as hallucination, i.e., generating fact-conflicting or irrelevant content. This problem critically undermines the applicability of LLMs in communication systems yet has not been systematically explored. Hence, this paper provides a comprehensive review of LLM applications in communications, with a particular emphasis on hallucination mitigation. Specifically, we analyze hallucination causes and summarize hallucination mitigation strategies from both model- and system-based perspectives. Afterward, we review representative LLM-empowered communication schemes, detailing potential hallucination scenarios and comparing the mitigation strategies they adopted. Finally, we present a case study of a Telecom-oriented LLM that utilizes a novel hybrid approach to enhance the hallucination-aware service experience. On the model side, we publish a Telecom hallucination dataset and apply direct preference optimization to fine-tune LLMs, resulting in a 20.6\% correct rate improvement. Moreover, we construct a mobile-edge mixture-of-experts architecture for optimal LLM expert activation. Our research aims to propel the field of LLM-empowered communications forward by detecting and minimizing hallucination impacts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06007v1-abstract-full').style.display = 'none'; document.getElementById('2412.06007v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.05949">arXiv:2412.05949</a> <span> [<a href="https://arxiv.org/pdf/2412.05949">pdf</a>, <a href="https://arxiv.org/format/2412.05949">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Dual UAV Cluster-Assisted Maritime Physical Layer Secure Communications via Collaborative Beamforming </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jiawei Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+A">Aimin Wang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+G">Geng Sun</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiahui Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.05949v1-abstract-short" style="display: inline;"> Unmanned aerial vehicles (UAVs) can be utilized as relay platforms to assist maritime wireless communications. However, complex channels and multipath effects at sea can adversely affect the quality of UAV transmitted signals. Collaborative beamforming (CB) can enhance the signal strength and range to assist the UAV relay for remote maritime communications. However, due to the open nature of UAV c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05949v1-abstract-full').style.display = 'inline'; document.getElementById('2412.05949v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.05949v1-abstract-full" style="display: none;"> Unmanned aerial vehicles (UAVs) can be utilized as relay platforms to assist maritime wireless communications. However, complex channels and multipath effects at sea can adversely affect the quality of UAV transmitted signals. Collaborative beamforming (CB) can enhance the signal strength and range to assist the UAV relay for remote maritime communications. However, due to the open nature of UAV channels, security issue requires special consideration. This paper proposes a dual UAV cluster-assisted system via CB to achieve physical layer security in maritime wireless communications. Specifically, one UAV cluster forms a maritime UAV-enabled virtual antenna array (MUVAA) relay to forward data signals to the remote legitimate vessel, and the other UAV cluster forms an MUVAA jammer to send jamming signals to the remote eavesdropper. In this system, we formulate a secure and energy-efficient maritime communication multi-objective optimization problem (SEMCMOP) to maximize the signal-to-interference-plus-noise ratio (SINR) of the legitimate vessel, minimize the SINR of the eavesdropping vessel and minimize the total flight energy consumption of UAVs. Since the SEMCMOP is an NP-hard and large-scale optimization problem, we propose an improved swarm intelligence optimization algorithm with chaotic solution initialization and hybrid solution update strategies to solve the problem. Simulation results indicate that the proposed algorithm outperforms other comparison algorithms, and it can achieve more efficient signal transmission by using the CB-based method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05949v1-abstract-full').style.display = 'none'; document.getElementById('2412.05949v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.03621">arXiv:2412.03621</a> <span> [<a href="https://arxiv.org/pdf/2412.03621">pdf</a>, <a href="https://arxiv.org/format/2412.03621">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Network-aided Efficient Large Language Model Services With Denoising-inspired Prompt Compression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=You%2C+F">Feiran You</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaibin Huang</a>, <a href="/search/cs?searchtype=author&query=Jamalipour%2C+A">Abbas Jamalipour</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.03621v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have demonstrated remarkable capabilities in various tasks, leading to their increasing adoption in diverse services delivered through wireless networks. There is a growing trend toward longer prompts to better leverage LLMs' capabilities and address difficult tasks. However, longer prompts not only increase data transmission costs across wireless transmission but also… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03621v1-abstract-full').style.display = 'inline'; document.getElementById('2412.03621v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.03621v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have demonstrated remarkable capabilities in various tasks, leading to their increasing adoption in diverse services delivered through wireless networks. There is a growing trend toward longer prompts to better leverage LLMs' capabilities and address difficult tasks. However, longer prompts not only increase data transmission costs across wireless transmission but also require more computing resources and processing time, impacting the overall system efficiency and user experience. To address this challenge, we propose Joint Power and Prompt Optimization (JPPO), a framework that combines Small Language Model (SLM)-based prompt compression with wireless power allocation optimization. By deploying SLM at edge devices for prompt compression and employing Deep Reinforcement Learning (DRL) for joint optimization of compression ratio and transmission power, JPPO effectively balances service quality with resource efficiency. Furthermore, inspired by denoising diffusion models, we design a denoising-inspired prompt compression approach that iteratively compresses prompts by gradually removing non-critical information. Experimental results demonstrate that our framework achieves high service fidelity while optimizing power usage in wireless LLM services, reducing the total service response time. With our DRL-based JPPO, the framework maintains fidelity comparable to the no-compression baseline while still achieving a 17% service time reduction through adaptive compression. When prioritizing compression, our framework achieves up to 16x compression ratio while maintaining acceptable fidelity (within 30% reduction). Compared to no compression, baseline single-round compression with a 16x compression ratio reduces the system total response time by approximately 42.3%, while the denoising-inspired method achieves a 46.5% service time-saving. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03621v1-abstract-full').style.display = 'none'; document.getElementById('2412.03621v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:2411.18010</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18010">arXiv:2411.18010</a> <span> [<a href="https://arxiv.org/pdf/2411.18010">pdf</a>, <a href="https://arxiv.org/format/2411.18010">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> JPPO: Joint Power and Prompt Optimization for Accelerated Large Language Model Services </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=You%2C+F">Feiran You</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaibin Huang</a>, <a href="/search/cs?searchtype=author&query=Jamalipour%2C+A">Abbas Jamalipour</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18010v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have demonstrated remarkable capabilities in various tasks, leading to their increasing deployment in wireless networks for a wide variety of user services. However, the growing longer prompt setting highlights the crucial issue of computational resource demands and huge communication load. To address this challenge, we propose Joint Power and Prompt Optimization (JPPO… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18010v1-abstract-full').style.display = 'inline'; document.getElementById('2411.18010v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18010v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have demonstrated remarkable capabilities in various tasks, leading to their increasing deployment in wireless networks for a wide variety of user services. However, the growing longer prompt setting highlights the crucial issue of computational resource demands and huge communication load. To address this challenge, we propose Joint Power and Prompt Optimization (JPPO), a framework that combines Small Language Model (SLM)-based prompt compression with wireless power allocation optimization. By deploying SLM at user devices for prompt compression and employing Deep Reinforcement Learning for joint optimization of compression ratio and transmission power, JPPO effectively balances service quality with resource efficiency. Experimental results demonstrate that our framework achieves high service fidelity and low bit error rates while optimizing power usage in wireless LLM services. The system reduces response time by about 17%, with the improvement varying based on the length of the original prompt. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18010v1-abstract-full').style.display = 'none'; document.getElementById('2411.18010v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.16729">arXiv:2411.16729</a> <span> [<a href="https://arxiv.org/pdf/2411.16729">pdf</a>, <a href="https://arxiv.org/format/2411.16729">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> DiM-Gestor: Co-Speech Gesture Generation with Adaptive Layer Normalization Mamba-2 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+F">Fan Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Siyuan Zhao</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+N">Naye Ji</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhaohan Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jingmei Wu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+F">Fuxing Gao</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Z">Zhenqing Ye</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L">Leyao Yan</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+L">Lanxin Dai</a>, <a href="/search/cs?searchtype=author&query=Geng%2C+W">Weidong Geng</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+X">Xin Lyu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+B">Bozuo Zhao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dingguo Yu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hui Du</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+B">Bin Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.16729v1-abstract-short" style="display: inline;"> Speech-driven gesture generation using transformer-based generative models represents a rapidly advancing area within virtual human creation. However, existing models face significant challenges due to their quadratic time and space complexities, limiting scalability and efficiency. To address these limitations, we introduce DiM-Gestor, an innovative end-to-end generative model leveraging the Mamb… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16729v1-abstract-full').style.display = 'inline'; document.getElementById('2411.16729v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.16729v1-abstract-full" style="display: none;"> Speech-driven gesture generation using transformer-based generative models represents a rapidly advancing area within virtual human creation. However, existing models face significant challenges due to their quadratic time and space complexities, limiting scalability and efficiency. To address these limitations, we introduce DiM-Gestor, an innovative end-to-end generative model leveraging the Mamba-2 architecture. DiM-Gestor features a dual-component framework: (1) a fuzzy feature extractor and (2) a speech-to-gesture mapping module, both built on the Mamba-2. The fuzzy feature extractor, integrated with a Chinese Pre-trained Model and Mamba-2, autonomously extracts implicit, continuous speech features. These features are synthesized into a unified latent representation and then processed by the speech-to-gesture mapping module. This module employs an Adaptive Layer Normalization (AdaLN)-enhanced Mamba-2 mechanism to uniformly apply transformations across all sequence tokens. This enables precise modeling of the nuanced interplay between speech features and gesture dynamics. We utilize a diffusion model to train and infer diverse gesture outputs. Extensive subjective and objective evaluations conducted on the newly released Chinese Co-Speech Gestures dataset corroborate the efficacy of our proposed model. Compared with Transformer-based architecture, the assessments reveal that our approach delivers competitive results and significantly reduces memory usage, approximately 2.4 times, and enhances inference speeds by 2 to 4 times. Additionally, we released the CCG dataset, a Chinese Co-Speech Gestures dataset, comprising 15.97 hours (six styles across five scenarios) of 3D full-body skeleton gesture motion performed by professional Chinese TV broadcasters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16729v1-abstract-full').style.display = 'none'; document.getElementById('2411.16729v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11929">arXiv:2411.11929</a> <span> [<a href="https://arxiv.org/pdf/2411.11929">pdf</a>, <a href="https://arxiv.org/format/2411.11929">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> ChatHTTPFuzz: Large Language Model-Assisted IoT HTTP Fuzzing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhe Yang</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+H">Hao Peng</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yanling Jiang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xingwei Li</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Haohua Du</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuhai Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jianwei Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11929v1-abstract-short" style="display: inline;"> Internet of Things (IoT) devices offer convenience through web interfaces, web VPNs, and other web-based services, all relying on the HTTP protocol. However, these externally exposed HTTP services resent significant security risks. Although fuzzing has shown some effectiveness in identifying vulnerabilities in IoT HTTP services, most state-of-the-art tools still rely on random mutation trategies,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11929v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11929v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11929v1-abstract-full" style="display: none;"> Internet of Things (IoT) devices offer convenience through web interfaces, web VPNs, and other web-based services, all relying on the HTTP protocol. However, these externally exposed HTTP services resent significant security risks. Although fuzzing has shown some effectiveness in identifying vulnerabilities in IoT HTTP services, most state-of-the-art tools still rely on random mutation trategies, leading to difficulties in accurately understanding the HTTP protocol's structure and generating many invalid test cases. Furthermore, These fuzzers rely on a limited set of initial seeds for testing. While this approach initiates testing, the limited number and diversity of seeds hinder comprehensive coverage of complex scenarios in IoT HTTP services. In this paper, we investigate and find that large language models (LLMs) excel in parsing HTTP protocol data and analyzing code logic. Based on these findings, we propose a novel LLM-guided IoT HTTP fuzzing method, ChatHTTPFuzz, which automatically parses protocol fields and analyzes service code logic to generate protocol-compliant test cases. Specifically, we use LLMs to label fields in HTTP protocol data, creating seed templates. Second, The LLM analyzes service code to guide the generation of additional packets aligned with the code logic, enriching the seed templates and their field values. Finally, we design an enhanced Thompson sampling algorithm based on the exploration balance factor and mutation potential factor to schedule seed templates. We evaluate ChatHTTPFuzz on 14 different real-world IoT devices. It finds more vulnerabilities than SNIPUZZ, BOOFUZZ, and MUTINY. ChatHTTPFuzz has discovered 103 vulnerabilities, of which 68 are unique, and 23 have been assigned CVEs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11929v1-abstract-full').style.display = 'none'; document.getElementById('2411.11929v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11340">arXiv:2411.11340</a> <span> [<a href="https://arxiv.org/pdf/2411.11340">pdf</a>, <a href="https://arxiv.org/format/2411.11340">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> A Hybrid Loss Framework for Decomposition-based Time Series Forecasting Methods: Balancing Global and Component Errors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Han%2C+R">Ronghui Han</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+D">Duanyu Feng</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyu Du</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11340v1-abstract-short" style="display: inline;"> Accurate time series forecasting, predicting future values based on past data, is crucial for diverse industries. Many current time series methods decompose time series into multiple sub-series, applying different model architectures and training with an end-to-end overall loss for forecasting. However, this raises a question: does this overall loss prioritize the importance of critical sub-series… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11340v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11340v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11340v1-abstract-full" style="display: none;"> Accurate time series forecasting, predicting future values based on past data, is crucial for diverse industries. Many current time series methods decompose time series into multiple sub-series, applying different model architectures and training with an end-to-end overall loss for forecasting. However, this raises a question: does this overall loss prioritize the importance of critical sub-series within the decomposition for the better performance? To investigate this, we conduct a study on the impact of overall loss on existing time series methods with sequence decomposition. Our findings reveal that overall loss may introduce bias in model learning, hindering the learning of the prioritization of more significant sub-series and limiting the forecasting performance. To address this, we propose a hybrid loss framework combining the global and component losses. This framework introduces component losses for each sub-series alongside the original overall loss. It employs a dual min-max algorithm to dynamically adjust weights between the overall loss and component losses, and within component losses. This enables the model to achieve better performance of current time series methods by focusing on more critical sub-series while still maintaining a low overall loss. We integrate our loss framework into several time series methods and evaluate the performance on multiple datasets. Results show an average improvement of 0.5-2% over existing methods without any modifications to the model architectures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11340v1-abstract-full').style.display = 'none'; document.getElementById('2411.11340v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11258">arXiv:2411.11258</a> <span> [<a href="https://arxiv.org/pdf/2411.11258">pdf</a>, <a href="https://arxiv.org/format/2411.11258">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> ESTVocoder: An Excitation-Spectral-Transformed Neural Vocoder Conditioned on Mel Spectrogram </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xiao-Hang Jiang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hui-Peng Du</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+Y">Yang Ai</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Ye-Xin Lu</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Z">Zhen-Hua Ling</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11258v1-abstract-short" style="display: inline;"> This paper proposes ESTVocoder, a novel excitation-spectral-transformed neural vocoder within the framework of source-filter theory. The ESTVocoder transforms the amplitude and phase spectra of the excitation into the corresponding speech amplitude and phase spectra using a neural filter whose backbone is ConvNeXt v2 blocks. Finally, the speech waveform is reconstructed through the inverse short-t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11258v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11258v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11258v1-abstract-full" style="display: none;"> This paper proposes ESTVocoder, a novel excitation-spectral-transformed neural vocoder within the framework of source-filter theory. The ESTVocoder transforms the amplitude and phase spectra of the excitation into the corresponding speech amplitude and phase spectra using a neural filter whose backbone is ConvNeXt v2 blocks. Finally, the speech waveform is reconstructed through the inverse short-time Fourier transform (ISTFT). The excitation is constructed based on the F0: for voiced segments, it contains full harmonic information, while for unvoiced segments, it is represented by noise. The excitation provides the filter with prior knowledge of the amplitude and phase patterns, expecting to reduce the modeling difficulty compared to conventional neural vocoders. To ensure the fidelity of the synthesized speech, an adversarial training strategy is applied to ESTVocoder with multi-scale and multi-resolution discriminators. Analysis-synthesis and text-to-speech experiments both confirm that our proposed ESTVocoder outperforms or is comparable to other baseline neural vocoders, e.g., HiFi-GAN, SiFi-GAN, and Vocos, in terms of synthesized speech quality, with a reasonable model complexity and generation speed. Additional analysis experiments also demonstrate that the introduced excitation effectively accelerates the model's convergence process, thanks to the speech spectral prior information contained in the excitation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11258v1-abstract-full').style.display = 'none'; document.getElementById('2411.11258v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NCMMSC2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11232">arXiv:2411.11232</a> <span> [<a href="https://arxiv.org/pdf/2411.11232">pdf</a>, <a href="https://arxiv.org/format/2411.11232">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> SAMOS: A Neural MOS Prediction Model Leveraging Semantic Representations and Acoustic Features </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yu-Fei Shi</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+Y">Yang Ai</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Ye-Xin Lu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hui-Peng Du</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Z">Zhen-Hua Ling</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11232v1-abstract-short" style="display: inline;"> Assessing the naturalness of speech using mean opinion score (MOS) prediction models has positive implications for the automatic evaluation of speech synthesis systems. Early MOS prediction models took the raw waveform or amplitude spectrum of speech as input, whereas more advanced methods employed self-supervised-learning (SSL) based models to extract semantic representations from speech for MOS… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11232v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11232v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11232v1-abstract-full" style="display: none;"> Assessing the naturalness of speech using mean opinion score (MOS) prediction models has positive implications for the automatic evaluation of speech synthesis systems. Early MOS prediction models took the raw waveform or amplitude spectrum of speech as input, whereas more advanced methods employed self-supervised-learning (SSL) based models to extract semantic representations from speech for MOS prediction. These methods utilized limited aspects of speech information for MOS prediction, resulting in restricted prediction accuracy. Therefore, in this paper, we propose SAMOS, a MOS prediction model that leverages both Semantic and Acoustic information of speech to be assessed. Specifically, the proposed SAMOS leverages a pretrained wav2vec2 to extract semantic representations and uses the feature extractor of a pretrained BiVocoder to extract acoustic features. These two types of features are then fed into the prediction network, which includes multi-task heads and an aggregation layer, to obtain the final MOS score. Experimental results demonstrate that the proposed SAMOS outperforms current state-of-the-art MOS prediction models on the BVCC dataset and performs comparable performance on the BC2019 dataset, according to the results of system-level evaluation metrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11232v1-abstract-full').style.display = 'none'; document.getElementById('2411.11232v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11123">arXiv:2411.11123</a> <span> [<a href="https://arxiv.org/pdf/2411.11123">pdf</a>, <a href="https://arxiv.org/format/2411.11123">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Pitch-and-Spectrum-Aware Singing Quality Assessment with Bias Correction and Model Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yu-Fei Shi</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+Y">Yang Ai</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Ye-Xin Lu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hui-Peng Du</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Z">Zhen-Hua Ling</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11123v3-abstract-short" style="display: inline;"> We participated in track 2 of the VoiceMOS Challenge 2024, which aimed to predict the mean opinion score (MOS) of singing samples. Our submission secured the first place among all participating teams, excluding the official baseline. In this paper, we further improve our submission and propose a novel Pitch-and-Spectrum-aware Singing Quality Assessment (PS-SQA) method. The PS-SQA is designed based… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11123v3-abstract-full').style.display = 'inline'; document.getElementById('2411.11123v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11123v3-abstract-full" style="display: none;"> We participated in track 2 of the VoiceMOS Challenge 2024, which aimed to predict the mean opinion score (MOS) of singing samples. Our submission secured the first place among all participating teams, excluding the official baseline. In this paper, we further improve our submission and propose a novel Pitch-and-Spectrum-aware Singing Quality Assessment (PS-SQA) method. The PS-SQA is designed based on the self-supervised-learning (SSL) MOS predictor, incorporating singing pitch and spectral information, which are extracted using pitch histogram and non-quantized neural codec, respectively. Additionally, the PS-SQA introduces a bias correction strategy to address prediction biases caused by low-resource training samples, and employs model fusion technology to further enhance prediction accuracy. Experimental results confirm that our proposed PS-SQA significantly outperforms all competing systems across all system-level metrics, confirming its strong sing quality assessment capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11123v3-abstract-full').style.display = 'none'; document.getElementById('2411.11123v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09846">arXiv:2411.09846</a> <span> [<a href="https://arxiv.org/pdf/2411.09846">pdf</a>, <a href="https://arxiv.org/format/2411.09846">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Leveraging Propagated Infection to Crossfire Mutants </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+H">Hang Du</a>, <a href="/search/cs?searchtype=author&query=Palepu%2C+V+K">Vijay Krishna Palepu</a>, <a href="/search/cs?searchtype=author&query=Jones%2C+J+A">James A. Jones</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09846v1-abstract-short" style="display: inline;"> Mutation testing was proposed to identify weaknesses in test suites by repeatedly generating artificially faulty versions of the software (mutants) and determining if the test suite is sufficient to detect them (kill them). When the tests are insufficient, each surviving mutant provides an opportunity to improve the test suite. We conducted a study and found that many such surviving mutants (up to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09846v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09846v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09846v1-abstract-full" style="display: none;"> Mutation testing was proposed to identify weaknesses in test suites by repeatedly generating artificially faulty versions of the software (mutants) and determining if the test suite is sufficient to detect them (kill them). When the tests are insufficient, each surviving mutant provides an opportunity to improve the test suite. We conducted a study and found that many such surviving mutants (up to 84% for the subjects of our study) are detectable by simply augmenting existing tests with additional assertions, or assertion amplification. Moreover, we find that many of these mutants are detectable by multiple existing tests, giving developers options for how to detect them. To help with these challenges, we created a technique that performs memory-state analysis to identify candidate assertions that developers can use to detect the surviving mutants. Additionally, we build upon prior research that identifies ``crossfiring'' opportunities -- tests that coincidentally kill multiple mutants. To this end, we developed a theoretical model that describes the varying granularities that crossfiring can occur in the existing test suite, which provide opportunities and options for how to kill surviving mutants. We operationalize this model to an accompanying technique that optimizes the assertion amplification of the existing tests to crossfire multiple mutants with fewer added assertions, optionally concentrated within fewer tests. Our experiments show that we can kill all surviving mutants that are detectable with existing test data with only 1.1% of the identified assertion candidates, and increasing by a factor of 6x, on average, the number of killed mutants from amplified tests, over tests that do not crossfire. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09846v1-abstract-full').style.display = 'none'; document.getElementById('2411.09846v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ICSE '25</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> D.2.5 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09712">arXiv:2411.09712</a> <span> [<a href="https://arxiv.org/pdf/2411.09712">pdf</a>, <a href="https://arxiv.org/format/2411.09712">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Digital Twin-Assisted Space-Air-Ground Integrated Multi-Access Edge Computing for Low-Altitude Economy: An Online Decentralized Optimization Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=He%2C+L">Long He</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+G">Geng Sun</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Z">Zemin Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiangchuan Liu</a>, <a href="/search/cs?searchtype=author&query=Leung%2C+V+C+M">Victor C. M. Leung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09712v2-abstract-short" style="display: inline;"> The emergence of space-air-ground integrated multi-access edge computing (SAGIMEC) networks opens a significant opportunity for the rapidly growing low altitude economy (LAE), facilitating the development of various applications by offering efficient communication and computing services. However, the heterogeneous nature of SAGIMEC networks, coupled with the stringent computational and communicati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09712v2-abstract-full').style.display = 'inline'; document.getElementById('2411.09712v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09712v2-abstract-full" style="display: none;"> The emergence of space-air-ground integrated multi-access edge computing (SAGIMEC) networks opens a significant opportunity for the rapidly growing low altitude economy (LAE), facilitating the development of various applications by offering efficient communication and computing services. However, the heterogeneous nature of SAGIMEC networks, coupled with the stringent computational and communication requirements of diverse applications in the LAE, introduces considerable challenges in integrating SAGIMEC into the LAE. In this work, we first present a digital twin-assisted SAGIMEC paradigm for LAE, where digital twin enables reliable network monitoring and management, while SAGIMEC provides efficient computing offloading services for Internet of Things sensor devices (ISDs). Then, a joint satellite selection, computation offloading, communication resource allocation, computation resource allocation and UAV trajectory control optimization problem (JSC4OP) is formulated to maximize the quality of service (QoS) of ISDs. Given the complexity of JSC4OP, we propose an online decentralized optimization approach (ODOA) to address the problem. Specifically, JSC4OP is first transformed into a real-time decision-making optimization problem (RDOP) by leveraging Lyapunov optimization. Then, to solve the RDOP, we introduce an online learning-based latency prediction method to predict the uncertain system environment and a game theoretic decision-making method to make real-time decisions. Finally, theoretical analysis confirms the effectiveness of the ODOA, while the simulation results demonstrate that the proposed ODOA outperforms other alternative approaches in terms of overall system performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09712v2-abstract-full').style.display = 'none'; document.getElementById('2411.09712v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2406.11918</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08672">arXiv:2411.08672</a> <span> [<a href="https://arxiv.org/pdf/2411.08672">pdf</a>, <a href="https://arxiv.org/format/2411.08672">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Joint Model Caching and Resource Allocation in Generative AI-Enabled Wireless Edge Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhang Liu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+L">Lianfen Huang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Z">Zhibin Gao</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08672v1-abstract-short" style="display: inline;"> With the rapid advancement of artificial intelligence (AI), generative AI (GenAI) has emerged as a transformative tool, enabling customized and personalized AI-generated content (AIGC) services. However, GenAI models with billions of parameters require substantial memory capacity and computational power for deployment and execution, presenting significant challenges to resource-limited edge networ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08672v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08672v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08672v1-abstract-full" style="display: none;"> With the rapid advancement of artificial intelligence (AI), generative AI (GenAI) has emerged as a transformative tool, enabling customized and personalized AI-generated content (AIGC) services. However, GenAI models with billions of parameters require substantial memory capacity and computational power for deployment and execution, presenting significant challenges to resource-limited edge networks. In this paper, we address the joint model caching and resource allocation problem in GenAI-enabled wireless edge networks. Our objective is to balance the trade-off between delivering high-quality AIGC and minimizing the delay in AIGC service provisioning. To tackle this problem, we employ a deep deterministic policy gradient (DDPG)-based reinforcement learning approach, capable of efficiently determining optimal model caching and resource allocation decisions for AIGC services in response to user mobility and time-varying channel conditions. Numerical results demonstrate that DDPG achieves a higher model hit ratio and provides superior-quality, lower-latency AIGC services compared to other benchmark solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08672v1-abstract-full').style.display = 'none'; document.getElementById('2411.08672v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">conference paper with 6 pages and 5 figures. arXiv admin note: text overlap with arXiv:2411.01458</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06928">arXiv:2411.06928</a> <span> [<a href="https://arxiv.org/pdf/2411.06928">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Multi-class Decoding of Attended Speaker Direction Using Electroencephalogram and Audio Spatial Spectrum </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuanming Zhang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+J">Jing Lu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+F">Fei Chen</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Haoliang Du</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+X">Xia Gao</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zhibin Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06928v2-abstract-short" style="display: inline;"> Decoding the directional focus of an attended speaker from listeners' electroencephalogram (EEG) signals is essential for developing brain-computer interfaces to improve the quality of life for individuals with hearing impairment. Previous works have concentrated on binary directional focus decoding, i.e., determining whether the attended speaker is on the left or right side of the listener. Howev… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06928v2-abstract-full').style.display = 'inline'; document.getElementById('2411.06928v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06928v2-abstract-full" style="display: none;"> Decoding the directional focus of an attended speaker from listeners' electroencephalogram (EEG) signals is essential for developing brain-computer interfaces to improve the quality of life for individuals with hearing impairment. Previous works have concentrated on binary directional focus decoding, i.e., determining whether the attended speaker is on the left or right side of the listener. However, a more precise decoding of the exact direction of the attended speaker is necessary for effective speech processing. Additionally, audio spatial information has not been effectively leveraged, resulting in suboptimal decoding results. In this paper, it is found that on the recently presented dataset with 14-class directional focus, models relying exclusively on EEG inputs exhibit significantly lower accuracy when decoding the directional focus in both leave-one-subject-out and leave-one-trial-out scenarios. By integrating audio spatial spectra with EEG features, the decoding accuracy can be effectively improved. The CNN, LSM-CNN, and Deformer models are employed to decode the directional focus from listeners' EEG signals and audio spatial spectra. The proposed Sp-EEG-Deformer model achieves notable 14-class decoding accuracies of 55.35% and 57.19% in leave-one-subject-out and leave-one-trial-out scenarios with a decision window of 1 second, respectively. Experiment results indicate increased decoding accuracy as the number of alternative directions reduces. These findings suggest the efficacy of our proposed dual modal directional focus decoding strategy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06928v2-abstract-full').style.display = 'none'; document.getElementById('2411.06928v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to IEEE TNSRE</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04137">arXiv:2411.04137</a> <span> [<a href="https://arxiv.org/pdf/2411.04137">pdf</a>, <a href="https://arxiv.org/format/2411.04137">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Generative AI Enabled Matching for 6G Multiple Access </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xudong Wang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+L">Lijie Zhou</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+L">Lei Feng</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhixiang Yang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+F">Fanqin Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenjing Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04137v1-abstract-short" style="display: inline;"> In wireless networks, applying deep learning models to solve matching problems between different entities has become a mainstream and effective approach. However, the complex network topology in 6G multiple access presents significant challenges for the real-time performance and stability of matching generation. Generative artificial intelligence (GenAI) has demonstrated strong capabilities in gra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04137v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04137v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04137v1-abstract-full" style="display: none;"> In wireless networks, applying deep learning models to solve matching problems between different entities has become a mainstream and effective approach. However, the complex network topology in 6G multiple access presents significant challenges for the real-time performance and stability of matching generation. Generative artificial intelligence (GenAI) has demonstrated strong capabilities in graph feature extraction, exploration, and generation, offering potential for graph-structured matching generation. In this paper, we propose a GenAI-enabled matching generation framework to support 6G multiple access. Specifically, we first summarize the classical matching theory, discuss common GenAI models and applications from the perspective of matching generation. Then, we propose a framework based on generative diffusion models (GDMs) that iteratively denoises toward reward maximization to generate a matching strategy that meets specific requirements. Experimental results show that, compared to decision-based AI approaches, our framework can generate more effective matching strategies based on given conditions and predefined rewards, helping to solve complex problems in 6G multiple access, such as task allocation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04137v1-abstract-full').style.display = 'none'; document.getElementById('2411.04137v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages,5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02863">arXiv:2411.02863</a> <span> [<a href="https://arxiv.org/pdf/2411.02863">pdf</a>, <a href="https://arxiv.org/format/2411.02863">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> LoopSCC: Towards Summarizing Multi-branch Loops within Determinate Cycles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+K">Kai Zhu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+C">Chenkai Guo</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+K">Kuihao Yan</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+X">Xiaoqi Jia</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Haichao Du</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Q">Qingjia Huang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yamin Xie</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jing Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02863v1-abstract-short" style="display: inline;"> Analyzing programs with loops is a challenging task, suffering from potential issues such as indeterminate number of iterations and exponential growth of control flow complexity. Loop summarization, as a static analysis method for concrete semantic interpretation, receives increasing focuses. It produces symbolic expressions semantically equivalent to the loop program. However, current loop summar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02863v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02863v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02863v1-abstract-full" style="display: none;"> Analyzing programs with loops is a challenging task, suffering from potential issues such as indeterminate number of iterations and exponential growth of control flow complexity. Loop summarization, as a static analysis method for concrete semantic interpretation, receives increasing focuses. It produces symbolic expressions semantically equivalent to the loop program. However, current loop summarization methods are only suitable for single-branch loops or multi-branch loops with simple cycles, without supporting complex loops with irregular branch-to-branch transitions. In this paper, we proposed LoopSCC, a novel loop summarization technique, to achieve concrete semantic interpretation on complex loop. LoopSCC analyzes the control flow at the granularity of single-loop-path and applies the strongly connected components (SCC for short) for contraction and simplification, resulting in the contracted single-loop-path graph (CSG for short). Based on the control flow information provided by the CSG, we can convert the loop summary into a combination of SCC summaries. When an SCC contains irregular branch-to-branch transitions, we propose to explore a convergent range to identify the determinate cycles of different execution paths, referred as oscillatory interval. The loop summarization composed of both iteration conditions and execution operations can eventually be derived recursively. Extensive experiments compared to six state-of-the-art loop interpretation methods are conducted to evaluate the effectiveness of LoopSCC. From the results, LoopSCC outperforms comparative methods in both interpretation accuracy and application effectiveness. Especially, LoopSCC achieves a 100% interpretation accuracy on public common-used benchmark. A systematical study for loop properties on three large-scale programs illustrates that LoopSCC presents outstanding scalability for real-world loop programs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02863v1-abstract-full').style.display = 'none'; document.getElementById('2411.02863v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01535">arXiv:2411.01535</a> <span> [<a href="https://arxiv.org/pdf/2411.01535">pdf</a>, <a href="https://arxiv.org/format/2411.01535">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Customized Subgraph Selection and Encoding for Drug-drug Interaction Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+H">Haotong Du</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+Q">Quanming Yao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Juzheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhen Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01535v1-abstract-short" style="display: inline;"> Subgraph-based methods have proven to be effective and interpretable in predicting drug-drug interactions (DDIs), which are essential for medical practice and drug development. Subgraph selection and encoding are critical stages in these methods, yet customizing these components remains underexplored due to the high cost of manual adjustments. In this study, inspired by the success of neural archi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01535v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01535v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01535v1-abstract-full" style="display: none;"> Subgraph-based methods have proven to be effective and interpretable in predicting drug-drug interactions (DDIs), which are essential for medical practice and drug development. Subgraph selection and encoding are critical stages in these methods, yet customizing these components remains underexplored due to the high cost of manual adjustments. In this study, inspired by the success of neural architecture search (NAS), we propose a method to search for data-specific components within subgraph-based frameworks. Specifically, we introduce extensive subgraph selection and encoding spaces that account for the diverse contexts of drug interactions in DDI prediction. To address the challenge of large search spaces and high sampling costs, we design a relaxation mechanism that uses an approximation strategy to efficiently explore optimal subgraph configurations. This approach allows for robust exploration of the search space. Extensive experiments demonstrate the effectiveness and superiority of the proposed method, with the discovered subgraphs and encoding functions highlighting the model's adaptability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01535v1-abstract-full').style.display = 'none'; document.getElementById('2411.01535v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2024</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Du%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Du%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Du%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Du%2C+H&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Du%2C+H&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Du%2C+H&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Du%2C+H&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> <li> <a href="/search/?searchtype=author&query=Du%2C+H&start=300" class="pagination-link " aria-label="Page 7" aria-current="page">7 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>