Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 86 results for author: <span class="mathjax">Jiang, T</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Jiang%2C+T">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Jiang, T"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Jiang%2C+T&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Jiang, T"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Jiang%2C+T&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Jiang%2C+T&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Jiang%2C+T&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.11479">arXiv:2412.11479</a> <span> [<a href="https://arxiv.org/pdf/2412.11479">pdf</a>, <a href="https://arxiv.org/format/2412.11479">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/GCWkshps58843.2023.10464958">10.1109/GCWkshps58843.2023.10464958 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Wireless Environmental Information Theory: A New Paradigm towards 6G Online and Proactive Environment Intelligence Communication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jianhua Zhang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+L">Li Yu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+S">Shaoyi Liu</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+Y">Yichen Cai</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yuxiang Zhang</a>, <a href="/search/eess?searchtype=author&query=Xing%2C+H">Hongbo Xing</a>, <a href="/search/eess?searchtype=author&query=jiang%2C+T">Tao jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.11479v1-abstract-short" style="display: inline;"> The channel is one of the five critical components of a communication system, and its ergodic capacity is based on all realizations of statistic channel model. This statistical paradigm has successfully guided the design of mobile communication systems from 1G to 5G. However, this approach relies on offline channel measurements in specific environments, and the system passively adapts to new envir… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11479v1-abstract-full').style.display = 'inline'; document.getElementById('2412.11479v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.11479v1-abstract-full" style="display: none;"> The channel is one of the five critical components of a communication system, and its ergodic capacity is based on all realizations of statistic channel model. This statistical paradigm has successfully guided the design of mobile communication systems from 1G to 5G. However, this approach relies on offline channel measurements in specific environments, and the system passively adapts to new environments, resulting in deviation from the optimal performance. With the pursuit of higher capacity and data rate of 6G, especially facing the ubiquitous environments, there is an urgent need for a new paradigm to combat the randomness of channel, i.e., more proactive and online manner. Motivated by this, we propose an environment intelligence communication (EIC) based on wireless environmental information theory (WEIT) for 6G. The proposed EIC architecture is composed of three steps: Firstly, wireless environmental information (WEI) is acquired using sensing techniques. Then, leveraging WEI and channel data, AI techniques are employed to predict channel fading, thereby mitigating channel uncertainty. Thirdly, the communication system autonomously determines the optimal air-interface transmission strategy based on real-time channel predictions, enabling intelligent interaction with the physical environment. To make this attractive paradigm shift from theory to practice, we answer three key problems to establish WEIT for the first time. How should WEI be defined? Can it be quantified? Does it hold the same properties as statistical communication information? Furthermore, EIC aided by WEI (EIC-WEI) is validated across multiple air-interface tasks, including CSI prediction, beam prediction, and radio resource management. Simulation results demonstrate that the proposed EIC-WEI significantly outperforms the statistical paradigm in decreasing overhead and performance optimization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11479v1-abstract-full').style.display = 'none'; document.getElementById('2412.11479v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.08752">arXiv:2412.08752</a> <span> [<a href="https://arxiv.org/pdf/2412.08752">pdf</a>, <a href="https://arxiv.org/format/2412.08752">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Experimental Analysis and Modeling of Penetration Loss for Building Materials in FR1 and FR3 bands </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+E">Enrui Liu</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+P">Pan Tang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jianhua Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.08752v1-abstract-short" style="display: inline;"> This study focuses on analysis and modeling of the penetration loss of typical building materials in the FR1 (450 MHz-6 GHz) and FR3 (7-24 GHz) bands based on experimental measurements. Firstly, we measure the penetration loss characteristics of four different typical building materials from 4 to 16 GHz, including wood, glass, foam and concrete, by using a penetration loss measurement platform bas… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08752v1-abstract-full').style.display = 'inline'; document.getElementById('2412.08752v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.08752v1-abstract-full" style="display: none;"> This study focuses on analysis and modeling of the penetration loss of typical building materials in the FR1 (450 MHz-6 GHz) and FR3 (7-24 GHz) bands based on experimental measurements. Firstly, we measure the penetration loss characteristics of four different typical building materials from 4 to 16 GHz, including wood, glass, foam and concrete, by using a penetration loss measurement platform based on the vector network analyzer (VNA). Next, we analyze the frequency dependence and thickness dependence of penetration loss. Finally, the linear model is applied to fit the curve of the measured penetration loss, and new model parameters for the penetration loss of different building materials are given, which are compared with that in the third generation partnership project (3GPP) technical report (TR) 38.901. The analysis results and new model parameters may provides insight into understanding propagation characteristics in FR1 and FR3 bands and 3GPP channel model standardisation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08752v1-abstract-full').style.display = 'none'; document.getElementById('2412.08752v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 4 figures, conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.07356">arXiv:2412.07356</a> <span> [<a href="https://arxiv.org/pdf/2412.07356">pdf</a>, <a href="https://arxiv.org/format/2412.07356">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Cascaded channel modeling and experimental validation for RIS assisted communication system </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jiwei Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yuxiang Zhang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Gong%2C+H">Huiwen Gong</a>, <a href="/search/eess?searchtype=author&query=Xing%2C+H">Hongbo Xing</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+L">Lei Tian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.07356v1-abstract-short" style="display: inline;"> Reconfigurable Intelligent Surface (RIS) is considered as a promising technology for 6G due to its ability to actively modify the electromagnetic propagation environment. Accurate channel modeling is essential for the design and evaluation of RIS assisted communication systems. Most current research models the RIS channel as a cascade of Tx-RIS and RIS-Rx sub-channels. However, most validation eff… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07356v1-abstract-full').style.display = 'inline'; document.getElementById('2412.07356v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.07356v1-abstract-full" style="display: none;"> Reconfigurable Intelligent Surface (RIS) is considered as a promising technology for 6G due to its ability to actively modify the electromagnetic propagation environment. Accurate channel modeling is essential for the design and evaluation of RIS assisted communication systems. Most current research models the RIS channel as a cascade of Tx-RIS and RIS-Rx sub-channels. However, most validation efforts regarding this assumption focus on large-scale path loss. To further explore this, in this paper, we derive and extend a convolution expression of RIS cascaded channel model based on the previously proposed Geometry-based Stochastic Model (GBSM)-based RIS cascaded channels. This model follows the 3GPP standard framework and leverages parameters such as angles, delays, and path powers defined in the GBSM model to more accurately reflect the smallscale characteristics of RIS multipath cascades. To verify the accuracy of this model, we conduct measurements of the TxRIS-Rx channel, Tx-RIS, and RIS-Rx sub-channels in a factory environment at 6.9 GHz, using the measured data to demonstrate the models validity and applicability in real-world scenarios. Validation with measured data shows that the proposed model accurately describes the characteristics of the RIS cascaded channel in terms of delay, angle, and power in complex multipath environments, providing important references for the design and deployment of RIS systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07356v1-abstract-full').style.display = 'none'; document.getElementById('2412.07356v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09936">arXiv:2411.09936</a> <span> [<a href="https://arxiv.org/pdf/2411.09936">pdf</a>, <a href="https://arxiv.org/format/2411.09936">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> A Multi-Scale Spatial-Temporal Network for Wireless Video Transmission </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhou%2C+X">Xinyi Zhou</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+D">Danlan Huang</a>, <a href="/search/eess?searchtype=author&query=Qi%2C+Z">Zhixin Qi</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+L">Liang Zhang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Ting Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09936v1-abstract-short" style="display: inline;"> Deep joint source-channel coding (DeepJSCC) has shown promise in wireless transmission of text, speech, and images within the realm of semantic communication. However, wireless video transmission presents greater challenges due to the difficulty of extracting and compactly representing both spatial and temporal features, as well as its significant bandwidth and computational resource requirements.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09936v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09936v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09936v1-abstract-full" style="display: none;"> Deep joint source-channel coding (DeepJSCC) has shown promise in wireless transmission of text, speech, and images within the realm of semantic communication. However, wireless video transmission presents greater challenges due to the difficulty of extracting and compactly representing both spatial and temporal features, as well as its significant bandwidth and computational resource requirements. In response, we propose a novel video DeepJSCC (VDJSCC) approach to enable end-to-end video transmission over a wireless channel. Our approach involves the design of a multi-scale vision Transformer encoder and decoder to effectively capture spatial-temporal representations over long-term frames. Additionally, we propose a dynamic token selection module to mask less semantically important tokens from spatial or temporal dimensions, allowing for content-adaptive variable-length video coding by adjusting the token keep ratio. Experimental results demonstrate the effectiveness of our VDJSCC approach compared to digital schemes that use separate source and channel codes, as well as other DeepJSCC schemes, in terms of reconstruction quality and bandwidth reduction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09936v1-abstract-full').style.display = 'none'; document.getElementById('2411.09936v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">2024 IEEE Global Communications Conference (GLOBECOM)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16955">arXiv:2410.16955</a> <span> [<a href="https://arxiv.org/pdf/2410.16955">pdf</a>, <a href="https://arxiv.org/format/2410.16955">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> PGCS: Physical Law embedded Generative Cloud Synthesis in Remote Sensing Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+L">Liying Xu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+H">Huifang Li</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+H">Huanfeng Shen</a>, <a href="/search/eess?searchtype=author&query=Lei%2C+M">Mingyang Lei</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16955v1-abstract-short" style="display: inline;"> Data quantity and quality are both critical for information extraction and analyzation in remote sensing. However, the current remote sensing datasets often fail to meet these two requirements, for which cloud is a primary factor degrading the data quantity and quality. This limitation affects the precision of results in remote sensing application, particularly those derived from data-driven techn… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16955v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16955v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16955v1-abstract-full" style="display: none;"> Data quantity and quality are both critical for information extraction and analyzation in remote sensing. However, the current remote sensing datasets often fail to meet these two requirements, for which cloud is a primary factor degrading the data quantity and quality. This limitation affects the precision of results in remote sensing application, particularly those derived from data-driven techniques. In this paper, a physical law embedded generative cloud synthesis method (PGCS) is proposed to generate diverse realistic cloud images to enhance real data and promote the development of algorithms for subsequent tasks, such as cloud correction, cloud detection, and data augmentation for classification, recognition, and segmentation. The PGCS method involves two key phases: spatial synthesis and spectral synthesis. In the spatial synthesis phase, a style-based generative adversarial network is utilized to simulate the spatial characteristics, generating an infinite number of single-channel clouds. In the spectral synthesis phase, the atmospheric scattering law is embedded through a local statistics and global fitting method, converting the single-channel clouds into multi-spectral clouds. The experimental results demonstrate that PGCS achieves a high accuracy in both phases and performs better than three other existing cloud synthesis methods. Two cloud correction methods are developed from PGCS and exhibits a superior performance compared to state-of-the-art methods in the cloud correction task. Furthermore, the application of PGCS with data from various sensors was investigated and successfully extended. Code will be provided at https://github.com/Liying-Xu/PGCS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16955v1-abstract-full').style.display = 'none'; document.getElementById('2410.16955v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 16 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19331">arXiv:2409.19331</a> <span> [<a href="https://arxiv.org/pdf/2409.19331">pdf</a>, <a href="https://arxiv.org/format/2409.19331">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Wireless Environment Information Sensing, Feature, Semantic, and Knowledge: Four Steps Towards 6G AI-Enabled Air Interface </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jianhua Zhang</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+Y">Yichen Cai</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+L">Li Yu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhen Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yuxiang Zhang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jialin Wang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+L">Liang Xia</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+P">Ping Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19331v1-abstract-short" style="display: inline;"> The air interface technology plays a crucial role in optimizing the communication quality for users. To address the challenges brought by the radio channel variations to air interface design, this article proposes a framework of wireless environment information-aided 6G AI-enabled air interface (WEI-6G AI$^{2}$), which actively acquires real-time environment details to facilitate channel fading pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19331v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19331v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19331v1-abstract-full" style="display: none;"> The air interface technology plays a crucial role in optimizing the communication quality for users. To address the challenges brought by the radio channel variations to air interface design, this article proposes a framework of wireless environment information-aided 6G AI-enabled air interface (WEI-6G AI$^{2}$), which actively acquires real-time environment details to facilitate channel fading prediction and communication technology optimization. Specifically, we first outline the role of WEI in supporting the 6G AI$^{2}$ in scenario adaptability, real-time inference, and proactive action. Then, WEI is delineated into four progressive steps: raw sensing data, features obtained by data dimensionality reduction, semantics tailored to tasks, and knowledge that quantifies the environmental impact on the channel. To validate the availability and compare the effect of different types of WEI, a path loss prediction use case is designed. The results demonstrate that leveraging environment knowledge requires only 2.2 ms of model inference time, which can effectively support real-time design for future 6G AI$^{2}$. Additionally, WEI can reduce the pilot overhead by 25\%. Finally, several open issues are pointed out, including multi-modal sensing data synchronization and information extraction method construction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19331v1-abstract-full').style.display = 'none'; document.getElementById('2409.19331v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14441">arXiv:2409.14441</a> <span> [<a href="https://arxiv.org/pdf/2409.14441">pdf</a>, <a href="https://arxiv.org/format/2409.14441">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> BUPTCMCC-6G-CMG+: A GBSM-Based ISAC Standard Channel Model Generator </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhao%2C+C">Changsheng Zhao</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jianhua Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yuxiang Zhang</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+L">Lei Tian</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Heng Wang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+H">Hanyuan Jiang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yameng Liu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wenjun Chen</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+G">Guangyi Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14441v4-abstract-short" style="display: inline;"> Integrated sensing and communication (ISAC) has been recognized as the key technology in the vision of the sixth generation (6G) era. With the emergence of new concepts in mobile communications, the channel model is the prerequisite for system design and performance evaluation. Currently, 3GPP Release 19 is advancing the standardization of ISAC channel models. Nevertheless, a unified modeling fram… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14441v4-abstract-full').style.display = 'inline'; document.getElementById('2409.14441v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14441v4-abstract-full" style="display: none;"> Integrated sensing and communication (ISAC) has been recognized as the key technology in the vision of the sixth generation (6G) era. With the emergence of new concepts in mobile communications, the channel model is the prerequisite for system design and performance evaluation. Currently, 3GPP Release 19 is advancing the standardization of ISAC channel models. Nevertheless, a unified modeling framework has yet to be established. This paper provides a simulation diagram of ISAC channel modeling extended based on the Geometry-Based Stochastic Model (GBSM), compatible with existing 5G channel models and the latest progress in the 3rd Generation Partnership Project (3GPP) standardization. We first introduce the progress of the ISAC channel model standardization in general. Then, a concatenated channel modeling approach is presented considering the team's standardization proposals, which is implemented on the BUPTCMCC-6G-CMG+ channel model generator. We validated the model in cumulative probability density function (CDF) in statistical extension of angle and delay, and radar cross section (RCS). Simulation results show that the proposed model can realistically characterize the feature of channel concatenation and RCS within the ISAC channel. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14441v4-abstract-full').style.display = 'none'; document.getElementById('2409.14441v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages,7 fiures,5 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00066">arXiv:2409.00066</a> <span> [<a href="https://arxiv.org/pdf/2409.00066">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Optical Semantic Communication through Multimode Fiber: From Symbol Transmission to Sentiment Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Gao%2C+Z">Zheng Gao</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Ting Jiang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+M">Mingming Zhang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+H">Hao Wu</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+M">Ming Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00066v1-abstract-short" style="display: inline;"> We propose and validate a novel optical semantic transmission scheme using multimode fiber (MMF). By leveraging the frequency sensitivity of intermodal dispersion in MMFs, we achieve high-dimensional semantic encoding and decoding in the frequency domain. Our system maps symbols to 128 distinct frequencies spaced at 600 kHz intervals, demonstrating a seven-fold increase in capacity compared to con… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00066v1-abstract-full').style.display = 'inline'; document.getElementById('2409.00066v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00066v1-abstract-full" style="display: none;"> We propose and validate a novel optical semantic transmission scheme using multimode fiber (MMF). By leveraging the frequency sensitivity of intermodal dispersion in MMFs, we achieve high-dimensional semantic encoding and decoding in the frequency domain. Our system maps symbols to 128 distinct frequencies spaced at 600 kHz intervals, demonstrating a seven-fold increase in capacity compared to conventional communication encoding. We further enhance spectral efficiency by implementing 4-level pulse amplitude modulation (PAM-4), achieving 9.12 bits/s/Hz without decoding errors. Additionally, we explore the application of this system for sentiment analysis using the IMDb movie review dataset. By encoding semantically similar symbols to adjacent frequencies, the system's noise tolerance is effectively improved, facilitating accurate sentiment analysis. This work highlights the potential of MMF-based semantic communication to enhance both capacity and robustness in optical communication systems, offering promising applications in bandwidth-constrained and noisy environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00066v1-abstract-full').style.display = 'none'; document.getElementById('2409.00066v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14493">arXiv:2408.14493</a> <span> [<a href="https://arxiv.org/pdf/2408.14493">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1049/cit2.12369">10.1049/cit2.12369 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Extraction of Typical Operating Scenarios of New Power System Based on Deep Time Series Aggregation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qu%2C+Z">Zhaoyang Qu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhenming Zhang</a>, <a href="/search/eess?searchtype=author&query=Qu%2C+N">Nan Qu</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Y">Yuguang Zhou</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yang Li</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+M">Min Li</a>, <a href="/search/eess?searchtype=author&query=Long%2C+C">Chao Long</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14493v1-abstract-short" style="display: inline;"> Extracting typical operational scenarios is essential for making flexible decisions in the dispatch of a new power system. This study proposed a novel deep time series aggregation scheme (DTSAs) to generate typical operational scenarios, considering the large amount of historical operational snapshot data. Specifically, DTSAs analyze the intrinsic mechanisms of different scheduling operational sce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14493v1-abstract-full').style.display = 'inline'; document.getElementById('2408.14493v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14493v1-abstract-full" style="display: none;"> Extracting typical operational scenarios is essential for making flexible decisions in the dispatch of a new power system. This study proposed a novel deep time series aggregation scheme (DTSAs) to generate typical operational scenarios, considering the large amount of historical operational snapshot data. Specifically, DTSAs analyze the intrinsic mechanisms of different scheduling operational scenario switching to mathematically represent typical operational scenarios. A gramian angular summation field (GASF) based operational scenario image encoder was designed to convert operational scenario sequences into high-dimensional spaces. This enables DTSAs to fully capture the spatiotemporal characteristics of new power systems using deep feature iterative aggregation models. The encoder also facilitates the generation of typical operational scenarios that conform to historical data distributions while ensuring the integrity of grid operational snapshots. Case studies demonstrate that the proposed method extracted new fine-grained power system dispatch schemes and outperformed the latest high-dimensional featurescreening methods. In addition, experiments with different new energy access ratios were conducted to verify the robustness of the proposed method. DTSAs enables dispatchers to master the operation experience of the power system in advance, and actively respond to the dynamic changes of the operation scenarios under the high access rate of new energy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14493v1-abstract-full').style.display = 'none'; document.getElementById('2408.14493v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CAAI Transactions on Intelligence Technology</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.11329">arXiv:2408.11329</a> <span> [<a href="https://arxiv.org/pdf/2408.11329">pdf</a>, <a href="https://arxiv.org/ps/2408.11329">ps</a>, <a href="https://arxiv.org/format/2408.11329">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Full-Duplex ISAC-Enabled D2D Underlaid Cellular Networks: Joint Transceiver Beamforming and Power Allocation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+M">Ming Jin</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+Q">Qinghua Guo</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yinhong Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yaming Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.11329v2-abstract-short" style="display: inline;"> Integrating device-to-device (D2D) communication into cellular networks can significantly reduce the transmission burden on base stations (BSs). Besides, integrated sensing and communication (ISAC) is envisioned as a key feature in future wireless networks. In this work, we consider a full-duplex ISAC- based D2D underlaid system, and propose a joint beamforming and power allocation scheme to impro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11329v2-abstract-full').style.display = 'inline'; document.getElementById('2408.11329v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.11329v2-abstract-full" style="display: none;"> Integrating device-to-device (D2D) communication into cellular networks can significantly reduce the transmission burden on base stations (BSs). Besides, integrated sensing and communication (ISAC) is envisioned as a key feature in future wireless networks. In this work, we consider a full-duplex ISAC- based D2D underlaid system, and propose a joint beamforming and power allocation scheme to improve the performance of the coexisting ISAC and D2D networks. To enhance spectral efficiency, a sum rate maximization problem is formulated for the full-duplex ISAC-based D2D underlaid system, which is non-convex. To solve the non-convex optimization problem, we propose a successive convex approximation (SCA)-based iterative algorithm and prove its convergence. Numerical results are provided to validate the effectiveness of the proposed scheme with the iterative algorithm, demonstrating that the proposed scheme outperforms state-of-the-art ones in both communication and sensing performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11329v2-abstract-full').style.display = 'none'; document.getElementById('2408.11329v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to IEEE Transactions on Wireless Communications on 7 June,2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.04158">arXiv:2408.04158</a> <span> [<a href="https://arxiv.org/pdf/2408.04158">pdf</a>, <a href="https://arxiv.org/format/2408.04158">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Efficient Single Image Super-Resolution with Entropy Attention and Receptive Field Augmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhao%2C+X">Xiaole Zhao</a>, <a href="/search/eess?searchtype=author&query=Li%2C+L">Linze Li</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+C">Chengxing Xie</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xiaoming Zhang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Ting Jiang</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+W">Wenjie Lin</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+S">Shuaicheng Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tianrui Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.04158v1-abstract-short" style="display: inline;"> Transformer-based deep models for single image super-resolution (SISR) have greatly improved the performance of lightweight SISR tasks in recent years. However, they often suffer from heavy computational burden and slow inference due to the complex calculation of multi-head self-attention (MSA), seriously hindering their practical application and deployment. In this work, we present an efficient S… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04158v1-abstract-full').style.display = 'inline'; document.getElementById('2408.04158v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.04158v1-abstract-full" style="display: none;"> Transformer-based deep models for single image super-resolution (SISR) have greatly improved the performance of lightweight SISR tasks in recent years. However, they often suffer from heavy computational burden and slow inference due to the complex calculation of multi-head self-attention (MSA), seriously hindering their practical application and deployment. In this work, we present an efficient SR model to mitigate the dilemma between model efficiency and SR performance, which is dubbed Entropy Attention and Receptive Field Augmentation network (EARFA), and composed of a novel entropy attention (EA) and a shifting large kernel attention (SLKA). From the perspective of information theory, EA increases the entropy of intermediate features conditioned on a Gaussian distribution, providing more informative input for subsequent reasoning. On the other hand, SLKA extends the receptive field of SR models with the assistance of channel shifting, which also favors to boost the diversity of hierarchical features. Since the implementation of EA and SLKA does not involve complex computations (such as extensive matrix multiplications), the proposed method can achieve faster nonlinear inference than Transformer-based SR models while maintaining better SR performance. Extensive experiments show that the proposed model can significantly reduce the delay of model inference while achieving the SR performance comparable with other advanced models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04158v1-abstract-full').style.display = 'none'; document.getElementById('2408.04158v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACM MM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.17286">arXiv:2406.17286</a> <span> [<a href="https://arxiv.org/pdf/2406.17286">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Prioritized experience replay-based DDQN for Unmanned Vehicle Path Planning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lipeng%2C+L">Liu Lipeng</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+L">Letian Xu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+J">Jiabei Liu</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+H">Haopeng Zhao</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tongzhou Jiang</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+T">Tianyao Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.17286v1-abstract-short" style="display: inline;"> Path planning module is a key module for autonomous vehicle navigation, which directly affects its operating efficiency and safety. In complex environments with many obstacles, traditional planning algorithms often cannot meet the needs of intelligence, which may lead to problems such as dead zones in unmanned vehicles. This paper proposes a path planning algorithm based on DDQN and combines it wi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17286v1-abstract-full').style.display = 'inline'; document.getElementById('2406.17286v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.17286v1-abstract-full" style="display: none;"> Path planning module is a key module for autonomous vehicle navigation, which directly affects its operating efficiency and safety. In complex environments with many obstacles, traditional planning algorithms often cannot meet the needs of intelligence, which may lead to problems such as dead zones in unmanned vehicles. This paper proposes a path planning algorithm based on DDQN and combines it with the prioritized experience replay method to solve the problem that traditional path planning algorithms often fall into dead zones. A series of simulation experiment results prove that the path planning algorithm based on DDQN is significantly better than other methods in terms of speed and accuracy, especially the ability to break through dead zones in extreme environments. Research shows that the path planning algorithm based on DDQN performs well in terms of path quality and safety. These research results provide an important reference for the research on automatic navigation of autonomous vehicles. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17286v1-abstract-full').style.display = 'none'; document.getElementById('2406.17286v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">4 pages, 6 figures, 2024 5th International Conference on Information Science, Parallel and Distributed Systems</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.00690">arXiv:2406.00690</a> <span> [<a href="https://arxiv.org/pdf/2406.00690">pdf</a>, <a href="https://arxiv.org/format/2406.00690">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Electromagnetic Wave Property Inspired Radio Environment Knowledge Construction and AI-based Verification for 6G Digital Twin Channel </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jialin Wang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jianhua Zhang</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+Y">Yutong Sun</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yuxiang Zhang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+L">Liang Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.00690v1-abstract-short" style="display: inline;"> As the underlying foundation of a digital twin network (DTN), a digital twin channel (DTC) can accurately depict the process of radio propagation in the air interface to support the DTN-based 6G wireless network. Since radio propagation is affected by the environment, constructing the relationship between the environment and radio wave propagation is the key to improving the accuracy of DTC, and t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00690v1-abstract-full').style.display = 'inline'; document.getElementById('2406.00690v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.00690v1-abstract-full" style="display: none;"> As the underlying foundation of a digital twin network (DTN), a digital twin channel (DTC) can accurately depict the process of radio propagation in the air interface to support the DTN-based 6G wireless network. Since radio propagation is affected by the environment, constructing the relationship between the environment and radio wave propagation is the key to improving the accuracy of DTC, and the construction method based on artificial intelligence (AI) is the most concentrated. However, in the existing methods, the environment information input into the neural network (NN) has many dimensions, and the correlation between the environment and the channel relationship is unclear, resulting in a highly complex relationship construction process. To solve this issue, in this paper, we propose a construction method of radio environment knowledge (REK) inspired by the electromagnetic wave property to quantify the contribution of radio propagation. Specifically, a range selection scheme for effective environment information based on random geometry is proposed to reduce the redundancy of environment information. We quantify the contribution of radio propagation reflection, diffraction and scatterer blockage using environment information and propose a flow chart of REK construction to replace the feature extraction process partially based on NN. To validate REK's effectiveness, we conduct a path loss prediction task based on a lightweight convolutional neural network (CNN) employing a simple two-layer convolutional structure. The results show that the accuracy of the range selection method reaches 90\%; the constructed REK maintains the prediction error of 0.3 and only needs 0.04 seconds of testing time, effectively reducing the network complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00690v1-abstract-full').style.display = 'none'; document.getElementById('2406.00690v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.03129">arXiv:2405.03129</a> <span> [<a href="https://arxiv.org/pdf/2405.03129">pdf</a>, <a href="https://arxiv.org/format/2405.03129">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Active Sensing for Multiuser Beam Tracking with Reconfigurable Intelligent Surface </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Han%2C+H">Han Han</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.03129v3-abstract-short" style="display: inline;"> This paper studies a beam tracking problem in which an access point (AP), in collaboration with a reconfigurable intelligent surface (RIS), dynamically adjusts its downlink beamformers and the reflection pattern at the RIS in order to maintain reliable communications with multiple mobile user equipments (UEs). Specifically, the mobile UEs send uplink pilots to the AP periodically during the channe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.03129v3-abstract-full').style.display = 'inline'; document.getElementById('2405.03129v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.03129v3-abstract-full" style="display: none;"> This paper studies a beam tracking problem in which an access point (AP), in collaboration with a reconfigurable intelligent surface (RIS), dynamically adjusts its downlink beamformers and the reflection pattern at the RIS in order to maintain reliable communications with multiple mobile user equipments (UEs). Specifically, the mobile UEs send uplink pilots to the AP periodically during the channel sensing intervals, the AP then adaptively configures the beamformers and the RIS reflection coefficients for subsequent data transmission based on the received pilots. This is an active sensing problem, because channel sensing involves configuring the RIS coefficients during the pilot stage and the optimal sensing strategy should exploit the trajectory of channel state information (CSI) from previously received pilots. Analytical solution to such an active sensing problem is very challenging. In this paper, we propose a deep learning framework utilizing a recurrent neural network (RNN) to automatically summarize the time-varying CSI obtained from the periodically received pilots into state vectors. These state vectors are then mapped to the AP beamformers and RIS reflection coefficients for subsequent downlink data transmissions, as well as the RIS reflection coefficients for the next round of uplink channel sensing. The mappings from the state vectors to the downlink beamformers and the RIS reflection coefficients for both channel sensing and downlink data transmission are performed using graph neural networks (GNNs) to account for the interference among the UEs. Simulations demonstrate significant and interpretable performance improvement of the proposed approach over the existing data-driven methods with nonadaptive channel sensing schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.03129v3-abstract-full').style.display = 'none'; document.getElementById('2405.03129v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.13277">arXiv:2404.13277</a> <span> [<a href="https://arxiv.org/pdf/2404.13277">pdf</a>, <a href="https://arxiv.org/format/2404.13277">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Beyond Score Changes: Adversarial Attack on No-Reference Image Quality Assessment from Two Perspectives </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+C">Chenxi Yang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yujia Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+D">Dingquan Li</a>, <a href="/search/eess?searchtype=author&query=Zhong%2C+Y">Yan Zhong</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tingting Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.13277v2-abstract-short" style="display: inline;"> Deep neural networks have demonstrated impressive success in No-Reference Image Quality Assessment (NR-IQA). However, recent researches highlight the vulnerability of NR-IQA models to subtle adversarial perturbations, leading to inconsistencies between model predictions and subjective ratings. Current adversarial attacks, however, focus on perturbing predicted scores of individual images, neglecti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13277v2-abstract-full').style.display = 'inline'; document.getElementById('2404.13277v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.13277v2-abstract-full" style="display: none;"> Deep neural networks have demonstrated impressive success in No-Reference Image Quality Assessment (NR-IQA). However, recent researches highlight the vulnerability of NR-IQA models to subtle adversarial perturbations, leading to inconsistencies between model predictions and subjective ratings. Current adversarial attacks, however, focus on perturbing predicted scores of individual images, neglecting the crucial aspect of inter-score correlation relationships within an entire image set. Meanwhile, it is important to note that the correlation, like ranking correlation, plays a significant role in NR-IQA tasks. To comprehensively explore the robustness of NR-IQA models, we introduce a new framework of correlation-error-based attacks that perturb both the correlation within an image set and score changes on individual images. Our research primarily focuses on ranking-related correlation metrics like Spearman's Rank-Order Correlation Coefficient (SROCC) and prediction error-related metrics like Mean Squared Error (MSE). As an instantiation, we propose a practical two-stage SROCC-MSE-Attack (SMA) that initially optimizes target attack scores for the entire image set and then generates adversarial examples guided by these scores. Experimental results demonstrate that our SMA method not only significantly disrupts the SROCC to negative values but also maintains a considerable change in the scores of individual images. Meanwhile, it exhibits state-of-the-art performance across metrics with different categories. Our method provides a new perspective on the robustness of NR-IQA models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13277v2-abstract-full').style.display = 'none'; document.getElementById('2404.13277v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to a conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.11397">arXiv:2403.11397</a> <span> [<a href="https://arxiv.org/pdf/2403.11397">pdf</a>, <a href="https://arxiv.org/format/2403.11397">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Defense Against Adversarial Attacks on No-Reference Image Quality Models with Gradient Norm Regularization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yujia Liu</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+C">Chenxi Yang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+D">Dingquan Li</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+J">Jianhao Ding</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tingting Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.11397v1-abstract-short" style="display: inline;"> The task of No-Reference Image Quality Assessment (NR-IQA) is to estimate the quality score of an input image without additional information. NR-IQA models play a crucial role in the media industry, aiding in performance evaluation and optimization guidance. However, these models are found to be vulnerable to adversarial attacks, which introduce imperceptible perturbations to input images, resulti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11397v1-abstract-full').style.display = 'inline'; document.getElementById('2403.11397v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.11397v1-abstract-full" style="display: none;"> The task of No-Reference Image Quality Assessment (NR-IQA) is to estimate the quality score of an input image without additional information. NR-IQA models play a crucial role in the media industry, aiding in performance evaluation and optimization guidance. However, these models are found to be vulnerable to adversarial attacks, which introduce imperceptible perturbations to input images, resulting in significant changes in predicted scores. In this paper, we propose a defense method to improve the stability in predicted scores when attacked by small perturbations, thus enhancing the adversarial robustness of NR-IQA models. To be specific, we present theoretical evidence showing that the magnitude of score changes is related to the $\ell_1$ norm of the model's gradient with respect to the input image. Building upon this theoretical foundation, we propose a norm regularization training strategy aimed at reducing the $\ell_1$ norm of the gradient, thereby boosting the robustness of NR-IQA models. Experiments conducted on four NR-IQA baseline models demonstrate the effectiveness of our strategy in reducing score changes in the presence of adversarial attacks. To the best of our knowledge, this work marks the first attempt to defend against adversarial attacks on NR-IQA models. Our study offers valuable insights into the adversarial robustness of NR-IQA models and provides a foundation for future research in this area. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11397v1-abstract-full').style.display = 'none'; document.getElementById('2403.11397v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by CVPR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.09004">arXiv:2403.09004</a> <span> [<a href="https://arxiv.org/pdf/2403.09004">pdf</a>, <a href="https://arxiv.org/ps/2403.09004">ps</a>, <a href="https://arxiv.org/format/2403.09004">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Meta-Learning-Based Fronthaul Compression for Cloud Radio Access Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qiao%2C+R">Ruihua Qiao</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.09004v1-abstract-short" style="display: inline;"> This paper investigates the fronthaul compression problem in a user-centric cloud radio access network, in which single-antenna users are served by a central processor (CP) cooperatively via a cluster of remote radio heads (RRHs). To satisfy the fronthaul capacity constraint, this paper proposes a transform-compress-forward scheme, which consists of well-designed transformation matrices and unifor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.09004v1-abstract-full').style.display = 'inline'; document.getElementById('2403.09004v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.09004v1-abstract-full" style="display: none;"> This paper investigates the fronthaul compression problem in a user-centric cloud radio access network, in which single-antenna users are served by a central processor (CP) cooperatively via a cluster of remote radio heads (RRHs). To satisfy the fronthaul capacity constraint, this paper proposes a transform-compress-forward scheme, which consists of well-designed transformation matrices and uniform quantizers. The transformation matrices perform dimension reduction in the uplink and dimension expansion in the downlink. To reduce the communication overhead for designing the transformation matrices, this paper further proposes a deep learning framework to first learn a suboptimal transformation matrix at each RRH based on the local channel state information (CSI), and then to refine it iteratively. To facilitate the refinement process, we propose an efficient signaling scheme that only requires the transmission of low-dimensional effective CSI and its gradient between the CP and RRH, and further, a meta-learning based gated recurrent unit network to reduce the number of signaling transmission rounds. For the sum-rate maximization problem, simulation results show that the proposed two-stage neural network can perform close to the fully cooperative global CSI based benchmark with significantly reduced communication overhead for both the uplink and the downlink. Moreover, using the first stage alone can already outperform the existing local CSI based benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.09004v1-abstract-full').style.display = 'none'; document.getElementById('2403.09004v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 Pages, 13 Figures; accepted in IEEE Transactions on Wireless Communications</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.00134">arXiv:2403.00134</a> <span> [<a href="https://arxiv.org/pdf/2403.00134">pdf</a>, <a href="https://arxiv.org/format/2403.00134">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Active Sensing for Reciprocal MIMO Channels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.00134v2-abstract-short" style="display: inline;"> This paper addresses the design of transmit precoder and receive combiner matrices to support $N_{\rm s}$ independent data streams over a time-division duplex (TDD) point-to-point massive multiple-input multiple-output (MIMO) channel with either a fully digital or a hybrid structure. The optimal precoder and combiner design amounts to finding the top-$N_{\rm s}$ singular vectors of the channel mat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00134v2-abstract-full').style.display = 'inline'; document.getElementById('2403.00134v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.00134v2-abstract-full" style="display: none;"> This paper addresses the design of transmit precoder and receive combiner matrices to support $N_{\rm s}$ independent data streams over a time-division duplex (TDD) point-to-point massive multiple-input multiple-output (MIMO) channel with either a fully digital or a hybrid structure. The optimal precoder and combiner design amounts to finding the top-$N_{\rm s}$ singular vectors of the channel matrix, but the explicit estimation of the entire high-dimensional channel would require significant pilot overhead. Alternatively, prior works suggest to find the precoding and combining matrices directly by exploiting channel reciprocity and by using the power iteration method, but its performance degrades in the low SNR regime. To tackle this challenging problem, this paper proposes a learning-based active sensing framework, where the transmitter and the receiver send pilots alternately using sensing beamformers that are actively designed as functions of previously received pilots. This is accomplished by using recurrent neural networks to summarize information from the historical observations into hidden state vectors, then using fully connected neural networks to learn the appropriate sensing beamformers in the next pilot stage and finally the transmit precoding and receive combiner matrices for data communications. Simulations demonstrate that the learning-based method outperforms existing approaches significantly and maintains superior performance even in the low SNR regime for both the fully digital and hybrid MIMO scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00134v2-abstract-full').style.display = 'none'; document.getElementById('2403.00134v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper is accepted in IEEE Transactions on Signal Processing</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.16153">arXiv:2402.16153</a> <span> [<a href="https://arxiv.org/pdf/2402.16153">pdf</a>, <a href="https://arxiv.org/format/2402.16153">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> ChatMusician: Understanding and Generating Music Intrinsically with LLM </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yuan%2C+R">Ruibin Yuan</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+H">Hanfeng Lin</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yi Wang</a>, <a href="/search/eess?searchtype=author&query=Tian%2C+Z">Zeyue Tian</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+S">Shangda Wu</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+T">Tianhao Shen</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+G">Ge Zhang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yuhang Wu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Cong Liu</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Z">Ziya Zhou</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Ziyang Ma</a>, <a href="/search/eess?searchtype=author&query=Xue%2C+L">Liumeng Xue</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Ziyu Wang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Q">Qin Liu</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+T">Tianyu Zheng</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yizhi Li</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Y">Yinghao Ma</a>, <a href="/search/eess?searchtype=author&query=Liang%2C+Y">Yiming Liang</a>, <a href="/search/eess?searchtype=author&query=Chi%2C+X">Xiaowei Chi</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+R">Ruibo Liu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zili Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+P">Pengfei Li</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+J">Jingcheng Wu</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+C">Chenghua Lin</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Q">Qifeng Liu</a> , et al. (10 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.16153v1-abstract-short" style="display: inline;"> While Large Language Models (LLMs) demonstrate impressive capabilities in text generation, we find that their ability has yet to be generalized to music, humanity's creative language. We introduce ChatMusician, an open-source LLM that integrates intrinsic musical abilities. It is based on continual pre-training and finetuning LLaMA2 on a text-compatible music representation, ABC notation, and the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.16153v1-abstract-full').style.display = 'inline'; document.getElementById('2402.16153v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.16153v1-abstract-full" style="display: none;"> While Large Language Models (LLMs) demonstrate impressive capabilities in text generation, we find that their ability has yet to be generalized to music, humanity's creative language. We introduce ChatMusician, an open-source LLM that integrates intrinsic musical abilities. It is based on continual pre-training and finetuning LLaMA2 on a text-compatible music representation, ABC notation, and the music is treated as a second language. ChatMusician can understand and generate music with a pure text tokenizer without any external multi-modal neural structures or tokenizers. Interestingly, endowing musical abilities does not harm language abilities, even achieving a slightly higher MMLU score. Our model is capable of composing well-structured, full-length music, conditioned on texts, chords, melodies, motifs, musical forms, etc, surpassing GPT-4 baseline. On our meticulously curated college-level music understanding benchmark, MusicTheoryBench, ChatMusician surpasses LLaMA2 and GPT-3.5 on zero-shot setting by a noticeable margin. Our work reveals that LLMs can be an excellent compressor for music, but there remains significant territory to be conquered. We release our 4B token music-language corpora MusicPile, the collected MusicTheoryBench, code, model and demo in GitHub. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.16153v1-abstract-full').style.display = 'none'; document.getElementById('2402.16153v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">GitHub: https://shanghaicannon.github.io/ChatMusician/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.11164">arXiv:2402.11164</a> <span> [<a href="https://arxiv.org/pdf/2402.11164">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> TinyLIC-High efficiency lossy image compression method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ma%2C+G">Gaocheng Ma</a>, <a href="/search/eess?searchtype=author&query=Chai%2C+Y">Yinfeng Chai</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tianhao Jiang</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+M">Ming Lu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+T">Tong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.11164v1-abstract-short" style="display: inline;"> Image compression has been the subject of extensive research for several decades, resulting in the development of well-known standards such as JPEG, JPEG2000, and H.264/AVC. However, recent advancements in deep learning have led to the emergence of learned image compression methods that offer significant improvements in coding efficiency compared to traditional codecs. These learned compression te… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11164v1-abstract-full').style.display = 'inline'; document.getElementById('2402.11164v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.11164v1-abstract-full" style="display: none;"> Image compression has been the subject of extensive research for several decades, resulting in the development of well-known standards such as JPEG, JPEG2000, and H.264/AVC. However, recent advancements in deep learning have led to the emergence of learned image compression methods that offer significant improvements in coding efficiency compared to traditional codecs. These learned compression techniques have shown noticeable gains and even outperformed traditional schemes <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11164v1-abstract-full').style.display = 'none'; document.getElementById('2402.11164v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.15321">arXiv:2401.15321</a> <span> [<a href="https://arxiv.org/pdf/2401.15321">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.apenergy.2024.122736">10.1016/j.apenergy.2024.122736 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Localization of Dummy Data Injection Attacks in Power Systems Considering Incomplete Topological Information: A Spatio-Temporal Graph Wavelet Convolutional Neural Network Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Qu%2C+Z">Zhaoyang Qu</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+Y">Yunchang Dong</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yang Li</a>, <a href="/search/eess?searchtype=author&query=Song%2C+S">Siqi Song</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+M">Min Li</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Q">Qiming Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+L">Lei Wang</a>, <a href="/search/eess?searchtype=author&query=Bo%2C+X">Xiaoyong Bo</a>, <a href="/search/eess?searchtype=author&query=Zang%2C+J">Jiye Zang</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+Q">Qi Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.15321v1-abstract-short" style="display: inline;"> The emergence of novel the dummy data injection attack (DDIA) poses a severe threat to the secure and stable operation of power systems. These attacks are particularly perilous due to the minimal Euclidean spatial separation between the injected malicious data and legitimate data, rendering their precise detection challenging using conventional distance-based methods. Furthermore, existing researc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.15321v1-abstract-full').style.display = 'inline'; document.getElementById('2401.15321v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.15321v1-abstract-full" style="display: none;"> The emergence of novel the dummy data injection attack (DDIA) poses a severe threat to the secure and stable operation of power systems. These attacks are particularly perilous due to the minimal Euclidean spatial separation between the injected malicious data and legitimate data, rendering their precise detection challenging using conventional distance-based methods. Furthermore, existing research predominantly focuses on various machine learning techniques, often analyzing the temporal data sequences post-attack or relying solely on Euclidean spatial characteristics. Unfortunately, this approach tends to overlook the inherent topological correlations within the non-Euclidean spatial attributes of power grid data, consequently leading to diminished accuracy in attack localization. To address this issue, this study takes a comprehensive approach. Initially, it examines the underlying principles of these new DDIAs on power systems. Here, an intricate mathematical model of the DDIA is designed, accounting for incomplete topological knowledge and alternating current (AC) state estimation from an attacker's perspective. Subsequently, by integrating a priori knowledge of grid topology and considering the temporal correlations within measurement data and the topology-dependent attributes of the power grid, this study introduces temporal and spatial attention matrices. These matrices adaptively capture the spatio-temporal correlations within the attacks. Leveraging gated stacked causal convolution and graph wavelet sparse convolution, the study jointly extracts spatio-temporal DDIA features. Finally, the research proposes a DDIA localization method based on spatio-temporal graph neural networks. The accuracy and effectiveness of the DDIA model are rigorously demonstrated through comprehensive analytical cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.15321v1-abstract-full').style.display = 'none'; document.getElementById('2401.15321v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by Applied Energy</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Applied Energy 360 (2024) 122736 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.13276">arXiv:2401.13276</a> <span> [<a href="https://arxiv.org/pdf/2401.13276">pdf</a>, <a href="https://arxiv.org/format/2401.13276">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> SCNet: Sparse Compression Network for Music Source Separation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Tong%2C+W">Weinan Tong</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+J">Jiaxu Zhu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+J">Jun Chen</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+S">Shiyin Kang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yang Li</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Z">Zhiyong Wu</a>, <a href="/search/eess?searchtype=author&query=Meng%2C+H">Helen Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.13276v1-abstract-short" style="display: inline;"> Deep learning-based methods have made significant achievements in music source separation. However, obtaining good results while maintaining a low model complexity remains challenging in super wide-band music source separation. Previous works either overlook the differences in subbands or inadequately address the problem of information loss when generating subband features. In this paper, we propo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13276v1-abstract-full').style.display = 'inline'; document.getElementById('2401.13276v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.13276v1-abstract-full" style="display: none;"> Deep learning-based methods have made significant achievements in music source separation. However, obtaining good results while maintaining a low model complexity remains challenging in super wide-band music source separation. Previous works either overlook the differences in subbands or inadequately address the problem of information loss when generating subband features. In this paper, we propose SCNet, a novel frequency-domain network to explicitly split the spectrogram of the mixture into several subbands and introduce a sparsity-based encoder to model different frequency bands. We use a higher compression ratio on subbands with less information to improve the information density and focus on modeling subbands with more information. In this way, the separation performance can be significantly improved using lower computational consumption. Experiment results show that the proposed model achieves a signal to distortion ratio (SDR) of 9.0 dB on the MUSDB18-HQ dataset without using extra data, which outperforms state-of-the-art methods. Specifically, SCNet's CPU inference time is only 48% of HT Demucs, one of the previous state-of-the-art models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13276v1-abstract-full').style.display = 'none'; document.getElementById('2401.13276v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICASSP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.05217">arXiv:2401.05217</a> <span> [<a href="https://arxiv.org/pdf/2401.05217">pdf</a>, <a href="https://arxiv.org/format/2401.05217">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Exploring Vulnerabilities of No-Reference Image Quality Assessment Models: A Query-Based Black-Box Method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yang%2C+C">Chenxi Yang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yujia Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+D">Dingquan Li</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tingting Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.05217v3-abstract-short" style="display: inline;"> No-Reference Image Quality Assessment (NR-IQA) aims to predict image quality scores consistent with human perception without relying on pristine reference images, serving as a crucial component in various visual tasks. Ensuring the robustness of NR-IQA methods is vital for reliable comparisons of different image processing techniques and consistent user experiences in recommendations. The attack m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05217v3-abstract-full').style.display = 'inline'; document.getElementById('2401.05217v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.05217v3-abstract-full" style="display: none;"> No-Reference Image Quality Assessment (NR-IQA) aims to predict image quality scores consistent with human perception without relying on pristine reference images, serving as a crucial component in various visual tasks. Ensuring the robustness of NR-IQA methods is vital for reliable comparisons of different image processing techniques and consistent user experiences in recommendations. The attack methods for NR-IQA provide a powerful instrument to test the robustness of NR-IQA. However, current attack methods of NR-IQA heavily rely on the gradient of the NR-IQA model, leading to limitations when the gradient information is unavailable. In this paper, we present a pioneering query-based black box attack against NR-IQA methods. We propose the concept of score boundary and leverage an adaptive iterative approach with multiple score boundaries. Meanwhile, the initial attack directions are also designed to leverage the characteristics of the Human Visual System (HVS). Experiments show our method outperforms all compared state-of-the-art attack methods and is far ahead of previous black-box methods. The effective NR-IQA model DBCNN suffers a Spearman's rank-order correlation coefficient (SROCC) decline of 0.6381 attacked by our method, revealing the vulnerability of NR-IQA models to black-box attacks. The proposed attack method also provides a potent tool for further exploration into NR-IQA robustness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05217v3-abstract-full').style.display = 'none'; document.getElementById('2401.05217v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.09002">arXiv:2312.09002</a> <span> [<a href="https://arxiv.org/pdf/2312.09002">pdf</a>, <a href="https://arxiv.org/format/2312.09002">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Localization with Reconfigurable Intelligent Surface: An Active Sensing Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhongze Zhang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.09002v2-abstract-short" style="display: inline;"> This paper addresses an uplink localization problem in which a base station (BS) aims to locate a remote user with the help of reconfigurable intelligent surfaces (RISs). We propose a strategy in which the user transmits pilots sequentially and the BS adaptively adjusts the sensing vectors, including the BS beamforming vector and multiple RIS reflection coefficients based on the observations alrea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.09002v2-abstract-full').style.display = 'inline'; document.getElementById('2312.09002v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.09002v2-abstract-full" style="display: none;"> This paper addresses an uplink localization problem in which a base station (BS) aims to locate a remote user with the help of reconfigurable intelligent surfaces (RISs). We propose a strategy in which the user transmits pilots sequentially and the BS adaptively adjusts the sensing vectors, including the BS beamforming vector and multiple RIS reflection coefficients based on the observations already made, to eventually produce an estimated user position. This is a challenging active sensing problem for which finding an optimal solution involves searching through a complicated functional space whose dimension increases with the number of measurements. We show that the long short-term memory (LSTM) network can be used to exploit the latent temporal correlation between measurements to automatically construct scalable state vectors. Subsequently, the state vector is mapped to the sensing vectors for the next time frame via a deep neural network (DNN). A final DNN is used to map the state vector to the estimated user position. Numerical result illustrates the advantage of the active sensing design as compared to non-active sensing methods. The proposed solution produces interpretable results and is generalizable in the number of sensing stages. Remarkably, we show that a network with one BS and multiple RISs can outperform a comparable setting with multiple BSs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.09002v2-abstract-full').style.display = 'none'; document.getElementById('2312.09002v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in IEEE Transactions on Wireless Communications. This is an extended version of the previous arXiv paper arXiv:2310.13160</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.12273">arXiv:2311.12273</a> <span> [<a href="https://arxiv.org/pdf/2311.12273">pdf</a>, <a href="https://arxiv.org/format/2311.12273">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> How AI-driven Digital Twins Can Empower Mobile Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+T">Tong Li</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+F">Fenyu Jiang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+Q">Qiaohong Yu</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+W">Wenzhen Huang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+D">Depeng Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.12273v1-abstract-short" style="display: inline;"> The growing complexity of next-generation networks exacerbates the modeling and algorithmic flaws of conventional network optimization methodology. In this paper, we propose a mobile network digital twin (MNDT) architecture for 6G networks. To address the modeling and algorithmic shortcomings, the MNDT uses a simulation-optimization structure. The feedback from the network simulation engine, which… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12273v1-abstract-full').style.display = 'inline'; document.getElementById('2311.12273v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.12273v1-abstract-full" style="display: none;"> The growing complexity of next-generation networks exacerbates the modeling and algorithmic flaws of conventional network optimization methodology. In this paper, we propose a mobile network digital twin (MNDT) architecture for 6G networks. To address the modeling and algorithmic shortcomings, the MNDT uses a simulation-optimization structure. The feedback from the network simulation engine, which serves as validation for the optimizer's decision outcomes, is used explicitly to train artificial intelligence (AI) empowered optimizers iteratively. In practice, we develop a network digital twin prototype system leveraging data-driven technology to accurately model the behaviors of mobile network elements (e.g., mobile users and base stations), wireless environments, and network performance. An AI-powered network optimizer has been developed based on the deployed MNDT prototype system for providing reliable and optimized network configurations. The results of the experiments demonstrate that the proposed MNDT infrastructure can provide practical network optimization solutions while adapting to the more complex environment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.12273v1-abstract-full').style.display = 'none'; document.getElementById('2311.12273v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.16765">arXiv:2310.16765</a> <span> [<a href="https://arxiv.org/pdf/2310.16765">pdf</a>, <a href="https://arxiv.org/format/2310.16765">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> How to Extend 3D GBSM to Integrated Sensing and Communication Channel with Sharing Feature? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yameng Liu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jianhua Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yuxiang Zhang</a>, <a href="/search/eess?searchtype=author&query=Gong%2C+H">Huiwen Gong</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+G">Guangyi Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.16765v1-abstract-short" style="display: inline;"> Integrated Sensing and Communication (ISAC) is a promising technology in 6G systems. The existing 3D Geometry-Based Stochastic Model (GBSM), as standardized for 5G systems, addresses solely communication channels and lacks consideration of the integration with sensing channel. Therefore, this letter extends 3D GBSM to support ISAC research, with a particular focus on capturing the sharing feature… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.16765v1-abstract-full').style.display = 'inline'; document.getElementById('2310.16765v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.16765v1-abstract-full" style="display: none;"> Integrated Sensing and Communication (ISAC) is a promising technology in 6G systems. The existing 3D Geometry-Based Stochastic Model (GBSM), as standardized for 5G systems, addresses solely communication channels and lacks consideration of the integration with sensing channel. Therefore, this letter extends 3D GBSM to support ISAC research, with a particular focus on capturing the sharing feature of both channels, including shared scatterers, clusters, paths, and similar propagation param-eters, which have been experimentally verified in the literature. The proposed approach can be summarized as follows: Firstly, an ISAC channel model is proposed, where shared and non-shared components are superimposed for both communication and sensing. Secondly, sensing channel is characterized as a cascade of TX-target, radar cross section, and target-RX, with the introduction of a novel parameter S for shared target extraction. Finally, an ISAC channel implementation framework is proposed, allowing flexible configuration of sharing feature and the joint generation of communication and sensing channels. The proposed ISAC channel model can be compatible with the 3GPP standards and offers promising support for ISAC technology evaluation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.16765v1-abstract-full').style.display = 'none'; document.getElementById('2310.16765v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.13160">arXiv:2310.13160</a> <span> [<a href="https://arxiv.org/pdf/2310.13160">pdf</a>, <a href="https://arxiv.org/format/2310.13160">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Active Sensing for Localization with Reconfigurable Intelligent Surface </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhongze Zhang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.13160v1-abstract-short" style="display: inline;"> This paper addresses an uplink localization problem in which the base station (BS) aims to locate a remote user with the aid of reconfigurable intelligent surface (RIS). This paper proposes a strategy in which the user transmits pilots over multiple time frames, and the BS adaptively adjusts the RIS reflection coefficients based on the observations already received so far in order to produce an ac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13160v1-abstract-full').style.display = 'inline'; document.getElementById('2310.13160v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.13160v1-abstract-full" style="display: none;"> This paper addresses an uplink localization problem in which the base station (BS) aims to locate a remote user with the aid of reconfigurable intelligent surface (RIS). This paper proposes a strategy in which the user transmits pilots over multiple time frames, and the BS adaptively adjusts the RIS reflection coefficients based on the observations already received so far in order to produce an accurate estimate of the user location at the end. This is a challenging active sensing problem for which finding an optimal solution involves a search through a complicated functional space whose dimension increases with the number of measurements. In this paper, we show that the long short-term memory (LSTM) network can be used to exploit the latent temporal correlation between measurements to automatically construct scalable information vectors (called hidden state) based on the measurements. Subsequently, the state vector can be mapped to the RIS configuration for the next time frame in a codebook-free fashion via a deep neural network (DNN). After all the measurements have been received, a final DNN can be used to map the LSTM cell state to the estimated user equipment (UE) position. Numerical result shows that the proposed active RIS design results in lower localization error as compared to existing active and nonactive methods. The proposed solution produces interpretable results and is generalizable to early stopping in the sequence of sensing stages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13160v1-abstract-full').style.display = 'none'; document.getElementById('2310.13160v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in IEEE International Conference on Communications (ICC) 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.11044">arXiv:2310.11044</a> <span> [<a href="https://arxiv.org/pdf/2310.11044">pdf</a>, <a href="https://arxiv.org/ps/2310.11044">ps</a>, <a href="https://arxiv.org/format/2310.11044">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Tutorial on Near-Field XL-MIMO Communications Towards 6G </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lu%2C+H">Haiquan Lu</a>, <a href="/search/eess?searchtype=author&query=Zeng%2C+Y">Yong Zeng</a>, <a href="/search/eess?searchtype=author&query=You%2C+C">Changsheng You</a>, <a href="/search/eess?searchtype=author&query=Han%2C+Y">Yu Han</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jiayi Zhang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zhe Wang</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+Z">Zhenjun Dong</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+S">Shi Jin</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+C">Cheng-Xiang Wang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=You%2C+X">Xiaohu You</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+R">Rui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.11044v3-abstract-short" style="display: inline;"> Extremely large-scale multiple-input multiple-output (XL-MIMO) is a promising technology for the sixth-generation (6G) mobile communication networks. By significantly boosting the antenna number or size to at least an order of magnitude beyond current massive MIMO systems, XL-MIMO is expected to unprecedentedly enhance the spectral efficiency and spatial resolution for wireless communication. The… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.11044v3-abstract-full').style.display = 'inline'; document.getElementById('2310.11044v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.11044v3-abstract-full" style="display: none;"> Extremely large-scale multiple-input multiple-output (XL-MIMO) is a promising technology for the sixth-generation (6G) mobile communication networks. By significantly boosting the antenna number or size to at least an order of magnitude beyond current massive MIMO systems, XL-MIMO is expected to unprecedentedly enhance the spectral efficiency and spatial resolution for wireless communication. The evolution from massive MIMO to XL-MIMO is not simply an increase in the array size, but faces new design challenges, in terms of near-field channel modelling, performance analysis, channel estimation, and practical implementation. In this article, we give a comprehensive tutorial overview on near-field XL-MIMO communications, aiming to provide useful guidance for tackling the above challenges. First, the basic near-field modelling for XL-MIMO is established, by considering the new characteristics of non-uniform spherical wave (NUSW) and spatial non-stationarity. Next, based on the near-field modelling, the performance analysis of XL-MIMO is presented, including the near-field signal-to-noise ratio (SNR) scaling laws, beam focusing pattern, achievable rate, and degrees-of-freedom (DoF). Furthermore, various XL-MIMO design issues such as near-field beam codebook, beam training, channel estimation, and delay alignment modulation (DAM) transmission are elaborated. Finally, we point out promising directions to inspire future research on near-field XL-MIMO communications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.11044v3-abstract-full').style.display = 'none'; document.getElementById('2310.11044v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">42 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.11977">arXiv:2309.11977</a> <span> [<a href="https://arxiv.org/pdf/2309.11977">pdf</a>, <a href="https://arxiv.org/format/2309.11977">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Improving Language Model-Based Zero-Shot Text-to-Speech Synthesis with Multi-Scale Acoustic Prompts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lei%2C+S">Shun Lei</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Y">Yixuan Zhou</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+L">Liyang Chen</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+D">Dan Luo</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Z">Zhiyong Wu</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+X">Xixin Wu</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+S">Shiyin Kang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Y">Yahui Zhou</a>, <a href="/search/eess?searchtype=author&query=Han%2C+Y">Yuxing Han</a>, <a href="/search/eess?searchtype=author&query=Meng%2C+H">Helen Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.11977v3-abstract-short" style="display: inline;"> Zero-shot text-to-speech (TTS) synthesis aims to clone any unseen speaker's voice without adaptation parameters. By quantizing speech waveform into discrete acoustic tokens and modeling these tokens with the language model, recent language model-based TTS models show zero-shot speaker adaptation capabilities with only a 3-second acoustic prompt of an unseen speaker. However, they are limited by th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.11977v3-abstract-full').style.display = 'inline'; document.getElementById('2309.11977v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.11977v3-abstract-full" style="display: none;"> Zero-shot text-to-speech (TTS) synthesis aims to clone any unseen speaker's voice without adaptation parameters. By quantizing speech waveform into discrete acoustic tokens and modeling these tokens with the language model, recent language model-based TTS models show zero-shot speaker adaptation capabilities with only a 3-second acoustic prompt of an unseen speaker. However, they are limited by the length of the acoustic prompt, which makes it difficult to clone personal speaking style. In this paper, we propose a novel zero-shot TTS model with the multi-scale acoustic prompts based on a neural codec language model VALL-E. A speaker-aware text encoder is proposed to learn the personal speaking style at the phoneme-level from the style prompt consisting of multiple sentences. Following that, a VALL-E based acoustic decoder is utilized to model the timbre from the timbre prompt at the frame-level and generate speech. The experimental results show that our proposed method outperforms baselines in terms of naturalness and speaker similarity, and can achieve better performance by scaling out to a longer style prompt. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.11977v3-abstract-full').style.display = 'none'; document.getElementById('2309.11977v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted bt ICASSP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.13575">arXiv:2308.13575</a> <span> [<a href="https://arxiv.org/pdf/2308.13575">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> FrFT based estimation of linear and nonlinear impairments using Vision Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Ting Jiang</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Z">Zheng Gao</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yizhao Chen</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+Z">Zihe Hu</a>, <a href="/search/eess?searchtype=author&query=Tang%2C+M">Ming Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.13575v1-abstract-short" style="display: inline;"> To comprehensively assess optical fiber communication system conditions, it is essential to implement joint estimation of the following four critical impairments: nonlinear signal-to-noise ratio (SNRNL), optical signal-to-noise ratio (OSNR), chromatic dispersion (CD) and differential group delay (DGD). However, current studies only achieve identifying a limited number of impairments within a narro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.13575v1-abstract-full').style.display = 'inline'; document.getElementById('2308.13575v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.13575v1-abstract-full" style="display: none;"> To comprehensively assess optical fiber communication system conditions, it is essential to implement joint estimation of the following four critical impairments: nonlinear signal-to-noise ratio (SNRNL), optical signal-to-noise ratio (OSNR), chromatic dispersion (CD) and differential group delay (DGD). However, current studies only achieve identifying a limited number of impairments within a narrow range, due to limitations in network capabilities and lack of unified representation of impairments. To address these challenges, we adopt time-frequency signal processing based on fractional Fourier transform (FrFT) to achieve the unified representation of impairments, while employing a Transformer based neural networks (NN) to break through network performance limitations. To verify the effectiveness of the proposed estimation method, the numerical simulation is carried on a 5-channel polarization-division-multiplexed quadrature phase shift keying (PDM-QPSK) long haul optical transmission system with the symbol rate of 50 GBaud per channel, the mean absolute error (MAE) for SNRNL, OSNR, CD, and DGD estimation is 0.091 dB, 0.058 dB, 117 ps/nm, and 0.38 ps, and the monitoring window ranges from 0~20 dB, 10~30 dB, 0~51000 ps/nm, and 0~100 ps, respectively. Our proposed method achieves accurate estimation of linear and nonlinear impairments over a broad range, representing a significant advancement in the field of optical performance monitoring (OPM). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.13575v1-abstract-full').style.display = 'none'; document.getElementById('2308.13575v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.04455">arXiv:2307.04455</a> <span> [<a href="https://arxiv.org/pdf/2307.04455">pdf</a>, <a href="https://arxiv.org/format/2307.04455">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> SAM-IQA: Can Segment Anything Boost Image Quality Assessment? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+X">Xinpeng Li</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Ting Jiang</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+H">Haoqiang Fan</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+S">Shuaicheng Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.04455v1-abstract-short" style="display: inline;"> Image Quality Assessment (IQA) is a challenging task that requires training on massive datasets to achieve accurate predictions. However, due to the lack of IQA data, deep learning-based IQA methods typically rely on pre-trained networks trained on massive datasets as feature extractors to enhance their generalization ability, such as the ResNet network trained on ImageNet. In this paper, we utili… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.04455v1-abstract-full').style.display = 'inline'; document.getElementById('2307.04455v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.04455v1-abstract-full" style="display: none;"> Image Quality Assessment (IQA) is a challenging task that requires training on massive datasets to achieve accurate predictions. However, due to the lack of IQA data, deep learning-based IQA methods typically rely on pre-trained networks trained on massive datasets as feature extractors to enhance their generalization ability, such as the ResNet network trained on ImageNet. In this paper, we utilize the encoder of Segment Anything, a recently proposed segmentation model trained on a massive dataset, for high-level semantic feature extraction. Most IQA methods are limited to extracting spatial-domain features, while frequency-domain features have been shown to better represent noise and blur. Therefore, we leverage both spatial-domain and frequency-domain features by applying Fourier and standard convolutions on the extracted features, respectively. Extensive experiments are conducted to demonstrate the effectiveness of all the proposed components, and results show that our approach outperforms the state-of-the-art (SOTA) in four representative datasets, both qualitatively and quantitatively. Our experiments confirm the powerful feature extraction capabilities of Segment Anything and highlight the value of combining spatial-domain and frequency-domain features in IQA tasks. Code: https://github.com/Hedlen/SAM-IQA <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.04455v1-abstract-full').style.display = 'none'; document.getElementById('2307.04455v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.08337">arXiv:2306.08337</a> <span> [<a href="https://arxiv.org/pdf/2306.08337">pdf</a>, <a href="https://arxiv.org/format/2306.08337">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Carbon emissions and sustainability of launching 5G mobile networks in China </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+T">Tong Li</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+L">Li Yu</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Y">Yibo Ma</a>, <a href="/search/eess?searchtype=author&query=Duan%2C+T">Tong Duan</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+W">Wenzhen Huang</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Y">Yan Zhou</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+D">Depeng Jin</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yong Li</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.08337v1-abstract-short" style="display: inline;"> Since 2021, China has deployed more than 2.1 million 5G base stations to increase the network capacity and provide ubiquitous digital connectivity for mobile terminals. However, the launch of 5G networks also exacerbates the misalignment between cellular traffic and energy consumption, which reduces carbon efficiency - the amount of network traffic that can be delivered for each unit of carbon emi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.08337v1-abstract-full').style.display = 'inline'; document.getElementById('2306.08337v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.08337v1-abstract-full" style="display: none;"> Since 2021, China has deployed more than 2.1 million 5G base stations to increase the network capacity and provide ubiquitous digital connectivity for mobile terminals. However, the launch of 5G networks also exacerbates the misalignment between cellular traffic and energy consumption, which reduces carbon efficiency - the amount of network traffic that can be delivered for each unit of carbon emission. In this study, we develop a large-scale data-driven framework to estimate the carbon emissions induced by mobile networks. We show that the decline in carbon efficiency leads to a carbon efficiency trap, estimated to cause additional carbon emissions of 23.82 +- 1.07 megatons in China. To mitigate the misalignment and improve energy efficiency, we propose DeepEnergy, an energy-saving method leveraging collaborative deep reinforcement learning and graph neural networks. DeepEnergy models complex collaboration among cells, making it possible to effectively coordinate the working state of tens of thousands of cells, which could help over 71% of Chinese provinces avoid carbon efficiency traps. In addition, applying DeepEnergy is estimated to reduce 20.90 +- 0.98 megatons of carbon emissions at the national level in 2023. We further assess the effects of adopting renewable energy and discover that the mobile network could accomplish more than 50% of its net-zero goal by integrating DeepEnergy and solar energy systems. Our study provides insight into carbon emission mitigation in 5G network infrastructure launching in China and overworld, paving the way towards achieving sustainable development goals and future net-zero mobile networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.08337v1-abstract-full').style.display = 'none'; document.getElementById('2306.08337v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.14022">arXiv:2305.14022</a> <span> [<a href="https://arxiv.org/pdf/2305.14022">pdf</a>, <a href="https://arxiv.org/format/2305.14022">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Realistic Noise Synthesis with Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qi Wu</a>, <a href="/search/eess?searchtype=author&query=Han%2C+M">Mingyan Han</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Ting Jiang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+C">Chengzhi Jiang</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+J">Jinting Luo</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+M">Man Jiang</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+H">Haoqiang Fan</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+S">Shuaicheng Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.14022v4-abstract-short" style="display: inline;"> Deep denoising models require extensive real-world training data, which is challenging to acquire. Current noise synthesis techniques struggle to accurately model complex noise distributions. We propose a novel Realistic Noise Synthesis Diffusor (RNSD) method using diffusion models to address these challenges. By encoding camera settings into a time-aware camera-conditioned affine modulation (TCCA… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14022v4-abstract-full').style.display = 'inline'; document.getElementById('2305.14022v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.14022v4-abstract-full" style="display: none;"> Deep denoising models require extensive real-world training data, which is challenging to acquire. Current noise synthesis techniques struggle to accurately model complex noise distributions. We propose a novel Realistic Noise Synthesis Diffusor (RNSD) method using diffusion models to address these challenges. By encoding camera settings into a time-aware camera-conditioned affine modulation (TCCAM), RNSD generates more realistic noise distributions under various camera conditions. Additionally, RNSD integrates a multi-scale content-aware module (MCAM), enabling the generation of structured noise with spatial correlations across multiple frequencies. We also introduce Deep Image Prior Sampling (DIPS), a learnable sampling sequence based on depth image prior, which significantly accelerates the sampling process while maintaining the high quality of synthesized noise. Extensive experiments demonstrate that our RNSD method significantly outperforms existing techniques in synthesizing realistic noise under multiple metrics and improving image denoising performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.14022v4-abstract-full').style.display = 'none'; document.getElementById('2305.14022v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI25</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.07130">arXiv:2305.07130</a> <span> [<a href="https://arxiv.org/pdf/2305.07130">pdf</a>, <a href="https://arxiv.org/format/2305.07130">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Active Sensing for Two-Sided Beam Alignment and Reflection Design Using Ping-Pong Pilots </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Sohrabi%2C+F">Foad Sohrabi</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.07130v1-abstract-short" style="display: inline;"> Beam alignment is an important task for millimeter-wave (mmWave) communication, because constructing aligned narrow beams both at the transmitter (Tx) and the receiver (Rx) is crucial in terms of compensating the significant path loss in very high-frequency bands. However, beam alignment is also a highly nontrivial task because large antenna arrays typically have a limited number of radio-frequenc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.07130v1-abstract-full').style.display = 'inline'; document.getElementById('2305.07130v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.07130v1-abstract-full" style="display: none;"> Beam alignment is an important task for millimeter-wave (mmWave) communication, because constructing aligned narrow beams both at the transmitter (Tx) and the receiver (Rx) is crucial in terms of compensating the significant path loss in very high-frequency bands. However, beam alignment is also a highly nontrivial task because large antenna arrays typically have a limited number of radio-frequency chains, allowing only low-dimensional measurements of the high-dimensional channel. This paper considers a two-sided beam alignment problem based on an alternating ping-pong pilot scheme between Tx and Rx over multiple rounds without explicit feedback. We propose a deep active sensing framework in which two long short-term memory (LSTM) based neural networks are employed to learn the adaptive sensing strategies (i.e., measurement vectors) and to produce the final aligned beamformers at both sides. In the proposed ping-pong protocol, the Tx and the Rx alternately send pilots so that both sides can leverage local observations to sequentially design their respective sensing and data transmission beamformers. The proposed strategy can be extended to scenarios with a reconfigurable intelligent surface (RIS) for designing, in addition, the reflection coefficients at the RIS for both sensing and communications. Numerical experiments demonstrate significant and interpretable performance improvement. The proposed strategy works well even for the challenging multipath channel environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.07130v1-abstract-full').style.display = 'none'; document.getElementById('2305.07130v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper is accepted in IEEE Journal on Selected Areas in Information Theory</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.05899">arXiv:2305.05899</a> <span> [<a href="https://arxiv.org/pdf/2305.05899">pdf</a>, <a href="https://arxiv.org/format/2305.05899">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Mobile Image Restoration via Prior Quantization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+S">Shiqi Chen</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+J">Jinwen Zhou</a>, <a href="/search/eess?searchtype=author&query=Li%2C+M">Menghao Li</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yueting Chen</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tingting Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.05899v1-abstract-short" style="display: inline;"> In digital images, the performance of optical aberration is a multivariate degradation, where the spectral of the scene, the lens imperfections, and the field of view together contribute to the results. Besides eliminating it at the hardware level, the post-processing system, which utilizes various prior information, is significant for correction. However, due to the content differences among prio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.05899v1-abstract-full').style.display = 'inline'; document.getElementById('2305.05899v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.05899v1-abstract-full" style="display: none;"> In digital images, the performance of optical aberration is a multivariate degradation, where the spectral of the scene, the lens imperfections, and the field of view together contribute to the results. Besides eliminating it at the hardware level, the post-processing system, which utilizes various prior information, is significant for correction. However, due to the content differences among priors, the pipeline that aligns these factors shows limited efficiency and unoptimized restoration. Here, we propose a prior quantization model to correct the optical aberrations in image processing systems. To integrate these messages, we encode various priors into a latent space and quantify them by the learnable codebooks. After quantization, the prior codes are fused with the image restoration branch to realize targeted optical aberration correction. Comprehensive experiments demonstrate the flexibility of the proposed method and validate its potential to accomplish targeted restoration for a specific camera. Furthermore, our model promises to analyze the correlation between the various priors and the optical aberration of devices, which is helpful for joint soft-hardware design. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.05899v1-abstract-full').style.display = 'none'; document.getElementById('2305.05899v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to Elsevier PRL. 5 pages, 5figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.07018">arXiv:2304.07018</a> <span> [<a href="https://arxiv.org/pdf/2304.07018">pdf</a>, <a href="https://arxiv.org/format/2304.07018">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> DIPNet: Efficiency Distillation and Iterative Pruning for Image Super-Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yu%2C+L">Lei Yu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+X">Xinpeng Li</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Youwei Li</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Ting Jiang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Q">Qi Wu</a>, <a href="/search/eess?searchtype=author&query=Fan%2C+H">Haoqiang Fan</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+S">Shuaicheng Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.07018v1-abstract-short" style="display: inline;"> Efficient deep learning-based approaches have achieved remarkable performance in single image super-resolution. However, recent studies on efficient super-resolution have mainly focused on reducing the number of parameters and floating-point operations through various network designs. Although these methods can decrease the number of parameters and floating-point operations, they may not necessari… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.07018v1-abstract-full').style.display = 'inline'; document.getElementById('2304.07018v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.07018v1-abstract-full" style="display: none;"> Efficient deep learning-based approaches have achieved remarkable performance in single image super-resolution. However, recent studies on efficient super-resolution have mainly focused on reducing the number of parameters and floating-point operations through various network designs. Although these methods can decrease the number of parameters and floating-point operations, they may not necessarily reduce actual running time. To address this issue, we propose a novel multi-stage lightweight network boosting method, which can enable lightweight networks to achieve outstanding performance. Specifically, we leverage enhanced high-resolution output as additional supervision to improve the learning ability of lightweight student networks. Upon convergence of the student network, we further simplify our network structure to a more lightweight level using reparameterization techniques and iterative network pruning. Meanwhile, we adopt an effective lightweight network training strategy that combines multi-anchor distillation and progressive learning, enabling the lightweight network to achieve outstanding performance. Ultimately, our proposed method achieves the fastest inference time among all participants in the NTIRE 2023 efficient super-resolution challenge while maintaining competitive super-resolution performance. Additionally, extensive experiments are conducted to demonstrate the effectiveness of the proposed components. The results show that our approach achieves comparable performance in representative dataset DIV2K, both qualitatively and quantitatively, with faster inference and fewer number of network parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.07018v1-abstract-full').style.display = 'none'; document.getElementById('2304.07018v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.17959">arXiv:2303.17959</a> <span> [<a href="https://arxiv.org/pdf/2303.17959">pdf</a>, <a href="https://arxiv.org/format/2303.17959">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Diffusion Action Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+D">Daochang Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Q">Qiyue Li</a>, <a href="/search/eess?searchtype=author&query=Dinh%2C+A">AnhDung Dinh</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tingting Jiang</a>, <a href="/search/eess?searchtype=author&query=Shah%2C+M">Mubarak Shah</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+C">Chang Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.17959v2-abstract-short" style="display: inline;"> Temporal action segmentation is crucial for understanding long-form videos. Previous works on this task commonly adopt an iterative refinement paradigm by using multi-stage models. We propose a novel framework via denoising diffusion models, which nonetheless shares the same inherent spirit of such iterative refinement. In this framework, action predictions are iteratively generated from random no… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.17959v2-abstract-full').style.display = 'inline'; document.getElementById('2303.17959v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.17959v2-abstract-full" style="display: none;"> Temporal action segmentation is crucial for understanding long-form videos. Previous works on this task commonly adopt an iterative refinement paradigm by using multi-stage models. We propose a novel framework via denoising diffusion models, which nonetheless shares the same inherent spirit of such iterative refinement. In this framework, action predictions are iteratively generated from random noise with input video features as conditions. To enhance the modeling of three striking characteristics of human actions, including the position prior, the boundary ambiguity, and the relational dependency, we devise a unified masking strategy for the conditioning inputs in our framework. Extensive experiments on three benchmark datasets, i.e., GTEA, 50Salads, and Breakfast, are performed and the proposed method achieves superior or comparable results to state-of-the-art methods, showing the effectiveness of a generative approach for action segmentation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.17959v2-abstract-full').style.display = 'none'; document.getElementById('2303.17959v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICCV 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.07201">arXiv:2211.07201</a> <span> [<a href="https://arxiv.org/pdf/2211.07201">pdf</a>, <a href="https://arxiv.org/format/2211.07201">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Towards A Unified Conformer Structure: from ASR to ASV Task </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liao%2C+D">Dexin Liao</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+F">Feng Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+L">Lin Li</a>, <a href="/search/eess?searchtype=author&query=Hong%2C+Q">Qingyang Hong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.07201v2-abstract-short" style="display: inline;"> Transformer has achieved extraordinary performance in Natural Language Processing and Computer Vision tasks thanks to its powerful self-attention mechanism, and its variant Conformer has become a state-of-the-art architecture in the field of Automatic Speech Recognition (ASR). However, the main-stream architecture for Automatic Speaker Verification (ASV) is convolutional Neural Networks, and there… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.07201v2-abstract-full').style.display = 'inline'; document.getElementById('2211.07201v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.07201v2-abstract-full" style="display: none;"> Transformer has achieved extraordinary performance in Natural Language Processing and Computer Vision tasks thanks to its powerful self-attention mechanism, and its variant Conformer has become a state-of-the-art architecture in the field of Automatic Speech Recognition (ASR). However, the main-stream architecture for Automatic Speaker Verification (ASV) is convolutional Neural Networks, and there is still much room for research on the Conformer based ASV. In this paper, firstly, we modify the Conformer architecture from ASR to ASV with very minor changes. Length-Scaled Attention (LSA) method and Sharpness-Aware Minimizationis (SAM) are adopted to improve model generalization. Experiments conducted on VoxCeleb and CN-Celeb show that our Conformer based ASV achieves competitive performance compared with the popular ECAPA-TDNN. Secondly, inspired by the transfer learning strategy, ASV Conformer is natural to be initialized from the pretrained ASR model. Via parameter transferring, self-attention mechanism could better focus on the relationship between sequence features, brings about 11% relative improvement in EER on test set of VoxCeleb and CN-Celeb, which reveals the potential of Conformer to unify ASV and ASR task. Finally, we provide a runtime in ASV-Subtools to evaluate its inference speed in production scenario. Our code is released at https://github.com/Snowdar/asv-subtools/tree/master/doc/papers/conformer.md. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.07201v2-abstract-full').style.display = 'none'; document.getElementById('2211.07201v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.02596">arXiv:2210.02596</a> <span> [<a href="https://arxiv.org/pdf/2210.02596">pdf</a>, <a href="https://arxiv.org/format/2210.02596">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Role of Deep Learning in Wireless Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a>, <a href="/search/eess?searchtype=author&query=Sohrabi%2C+F">Foad Sohrabi</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.02596v1-abstract-short" style="display: inline;"> Traditional communication system design has always been based on the paradigm of first establishing a mathematical model of the communication channel, then designing and optimizing the system according to the model. The advent of modern machine learning techniques, specifically deep neural networks, has opened up opportunities for data-driven system design and optimization. This article draws exam… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.02596v1-abstract-full').style.display = 'inline'; document.getElementById('2210.02596v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.02596v1-abstract-full" style="display: none;"> Traditional communication system design has always been based on the paradigm of first establishing a mathematical model of the communication channel, then designing and optimizing the system according to the model. The advent of modern machine learning techniques, specifically deep neural networks, has opened up opportunities for data-driven system design and optimization. This article draws examples from the optimization of reconfigurable intelligent surface, distributed channel estimation and feedback for multiuser beamforming, and active sensing for millimeter wave (mmWave) initial alignment to illustrate that a data-driven design that bypasses explicit channel modelling can often discover excellent solutions to communication system design and optimization problems that are otherwise computationally difficult to solve. We show that by performing an end-to-end training of a deep neural network using a large number of channel samples, a machine learning based approach can potentially provide significant system-level improvements as compared to the traditional model-based approach for solving optimization problems. The key to the successful applications of machine learning techniques is in choosing the appropriate neural network architecture to match the underlying problem structure. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.02596v1-abstract-full').style.display = 'none'; document.getElementById('2210.02596v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 12 figures, To appear in IEEE BITS the Information Theory Magazine</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.12633">arXiv:2205.12633</a> <span> [<a href="https://arxiv.org/pdf/2205.12633">pdf</a>, <a href="https://arxiv.org/format/2205.12633">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> NTIRE 2022 Challenge on High Dynamic Range Imaging: Methods and Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=P%C3%A9rez-Pellitero%2C+E">Eduardo P茅rez-Pellitero</a>, <a href="/search/eess?searchtype=author&query=Catley-Chandar%2C+S">Sibi Catley-Chandar</a>, <a href="/search/eess?searchtype=author&query=Shaw%2C+R">Richard Shaw</a>, <a href="/search/eess?searchtype=author&query=Leonardis%2C+A">Ale拧 Leonardis</a>, <a href="/search/eess?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zexin Zhang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+C">Cen Liu</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+Y">Yunbo Peng</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Yue Lin</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+G">Gaocheng Yu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jin Zhang</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+Z">Zhe Ma</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hongbin Wang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+X">Xiangyu Chen</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xintao Wang</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+H">Haiwei Wu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+L">Lin Liu</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+C">Chao Dong</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+J">Jiantao Zhou</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+Q">Qingsen Yan</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+S">Song Zhang</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Weiye Chen</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Y">Yuhang Liu</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhen Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Y">Yanning Zhang</a> , et al. (68 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.12633v1-abstract-short" style="display: inline;"> This paper reviews the challenge on constrained high dynamic range (HDR) imaging that was part of the New Trends in Image Restoration and Enhancement (NTIRE) workshop, held in conjunction with CVPR 2022. This manuscript focuses on the competition set-up, datasets, the proposed methods and their results. The challenge aims at estimating an HDR image from multiple respective low dynamic range (LDR)… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12633v1-abstract-full').style.display = 'inline'; document.getElementById('2205.12633v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.12633v1-abstract-full" style="display: none;"> This paper reviews the challenge on constrained high dynamic range (HDR) imaging that was part of the New Trends in Image Restoration and Enhancement (NTIRE) workshop, held in conjunction with CVPR 2022. This manuscript focuses on the competition set-up, datasets, the proposed methods and their results. The challenge aims at estimating an HDR image from multiple respective low dynamic range (LDR) observations, which might suffer from under- or over-exposed regions and different sources of noise. The challenge is composed of two tracks with an emphasis on fidelity and complexity constraints: In Track 1, participants are asked to optimize objective fidelity scores while imposing a low-complexity constraint (i.e. solutions can not exceed a given number of operations). In Track 2, participants are asked to minimize the complexity of their solutions while imposing a constraint on fidelity scores (i.e. solutions are required to obtain a higher fidelity score than the prescribed baseline). Both tracks use the same data and metrics: Fidelity is measured by means of PSNR with respect to a ground-truth HDR image (computed both directly and with a canonical tonemapping operation), while complexity metrics include the number of Multiply-Accumulate (MAC) operations and runtime (in seconds). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.12633v1-abstract-full').style.display = 'none'; document.getElementById('2205.12633v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR Workshops 2022. 15 pages, 21 figures, 2 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, 2022 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.06396">arXiv:2205.06396</a> <span> [<a href="https://arxiv.org/pdf/2205.06396">pdf</a>, <a href="https://arxiv.org/format/2205.06396">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/JSTSP.2022.3178213">10.1109/JSTSP.2022.3178213 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Learning Based User Scheduling in Reconfigurable Intelligent Surface Assisted Multiuser Downlink </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhongze Zhang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.06396v1-abstract-short" style="display: inline;"> Reconfigurable intelligent surface (RIS) is capable of intelligently manipulating the phases of the incident electromagnetic wave to improve the wireless propagation environment between the base-station (BS) and the users. This paper addresses the joint user scheduling, RIS configuration, and BS beamforming problem in an RIS-assisted downlink network with limited pilot overhead. We show that graph… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.06396v1-abstract-full').style.display = 'inline'; document.getElementById('2205.06396v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.06396v1-abstract-full" style="display: none;"> Reconfigurable intelligent surface (RIS) is capable of intelligently manipulating the phases of the incident electromagnetic wave to improve the wireless propagation environment between the base-station (BS) and the users. This paper addresses the joint user scheduling, RIS configuration, and BS beamforming problem in an RIS-assisted downlink network with limited pilot overhead. We show that graph neural networks (GNN) with permutation invariant and equivariant properties can be used to appropriately schedule users and to design RIS configurations to achieve high overall throughput while accounting for fairness among the users. As compared to the conventional methodology of first estimating the channels then optimizing the user schedule, RIS configuration and the beamformers, this paper shows that an optimized user schedule can be obtained directly from a very short set of pilots using a GNN, then the RIS configuration can be optimized using a second GNN, and finally the BS beamformers can be designed based on the overall effective channel. Numerical results show that the proposed approach can utilize the received pilots more efficiently than the conventional channel estimation based approach, and can generalize to systems with an arbitrary number of users. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.06396v1-abstract-full').style.display = 'none'; document.getElementById('2205.06396v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in IEEE Journal of Selected Topics in Signal Processing</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.09020">arXiv:2202.09020</a> <span> [<a href="https://arxiv.org/pdf/2202.09020">pdf</a>, <a href="https://arxiv.org/format/2202.09020">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> A Comprehensive Survey with Quantitative Comparison of Image Analysis Methods for Microorganism Biovolume Measurements </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jiawei Zhang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C">Chen Li</a>, <a href="/search/eess?searchtype=author&query=Rahaman%2C+M+M">Md Mamunur Rahaman</a>, <a href="/search/eess?searchtype=author&query=Yao%2C+Y">Yudong Yao</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+P">Pingli Ma</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jinghua Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+X">Xin Zhao</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Grzegorzek%2C+M">Marcin Grzegorzek</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.09020v2-abstract-short" style="display: inline;"> With the acceleration of urbanization and living standards, microorganisms play increasingly important roles in industrial production, bio-technique, and food safety testing. Microorganism biovolume measurements are one of the essential parts of microbial analysis. However, traditional manual measurement methods are time-consuming and challenging to measure the characteristics precisely. With the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.09020v2-abstract-full').style.display = 'inline'; document.getElementById('2202.09020v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.09020v2-abstract-full" style="display: none;"> With the acceleration of urbanization and living standards, microorganisms play increasingly important roles in industrial production, bio-technique, and food safety testing. Microorganism biovolume measurements are one of the essential parts of microbial analysis. However, traditional manual measurement methods are time-consuming and challenging to measure the characteristics precisely. With the development of digital image processing techniques, the characteristics of the microbial population can be detected and quantified. The changing trend can be adjusted in time and provided a basis for the improvement. The applications of the microorganism biovolume measurement method have developed since the 1980s. More than 62 articles are reviewed in this study, and the articles are grouped by digital image segmentation methods with periods. This study has high research significance and application value, which can be referred to microbial researchers to have a comprehensive understanding of microorganism biovolume measurements using digital image analysis methods and potential applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.09020v2-abstract-full').style.display = 'none'; document.getElementById('2202.09020v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.07820">arXiv:2202.07820</a> <span> [<a href="https://arxiv.org/pdf/2202.07820">pdf</a>, <a href="https://arxiv.org/format/2202.07820">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Survey of Semen Quality Evaluation in Microscopic Videos Using Computer Assisted Sperm Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhao%2C+W">Wenwei Zhao</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+P">Pingli Ma</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C">Chen Li</a>, <a href="/search/eess?searchtype=author&query=Bu%2C+X">Xiaoning Bu</a>, <a href="/search/eess?searchtype=author&query=Zou%2C+S">Shuojia Zou</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Grzegorzek%2C+M">Marcin Grzegorzek</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.07820v2-abstract-short" style="display: inline;"> The Computer Assisted Sperm Analysis (CASA) plays a crucial role in male reproductive health diagnosis and Infertility treatment. With the development of the computer industry in recent years, a great of accurate algorithms are proposed. With the assistance of those novel algorithms, it is possible for CASA to achieve a faster and higher quality result. Since image processing is the technical basi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.07820v2-abstract-full').style.display = 'inline'; document.getElementById('2202.07820v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.07820v2-abstract-full" style="display: none;"> The Computer Assisted Sperm Analysis (CASA) plays a crucial role in male reproductive health diagnosis and Infertility treatment. With the development of the computer industry in recent years, a great of accurate algorithms are proposed. With the assistance of those novel algorithms, it is possible for CASA to achieve a faster and higher quality result. Since image processing is the technical basis of CASA, including pre-processing,feature extraction, target detection and tracking, these methods are important technical steps in dealing with CASA. The various works related to Computer Assisted Sperm Analysis methods in the last 30 years (since 1988) are comprehensively introduced and analysed in this survey. To facilitate understanding, the methods involved are analysed in the sequence of general steps in sperm analysis. In other words, the methods related to sperm detection (localization) are first analysed, and then the methods of sperm tracking are analysed. Beside this, we analyse and prospect the present situation and future of CASA. According to our work, the feasible for applying in sperm microscopic video of methods mentioned in this review is explained. Moreover, existing challenges of object detection and tracking in microscope video are potential to be solved inspired by this survey. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.07820v2-abstract-full').style.display = 'none'; document.getElementById('2202.07820v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.06948">arXiv:2202.06948</a> <span> [<a href="https://arxiv.org/pdf/2202.06948">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3389/fncom.2023.1232925">10.3389/fncom.2023.1232925 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Towards Best Practice of Interpreting Deep Learning Models for EEG-based Brain Computer Interfaces </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Cui%2C+J">Jian Cui</a>, <a href="/search/eess?searchtype=author&query=Yuan%2C+L">Liqiang Yuan</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zhaoxiang Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+R">Ruilin Li</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tianzi Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.06948v3-abstract-short" style="display: inline;"> As deep learning has achieved state-of-the-art performance for many tasks of EEG-based BCI, many efforts have been made in recent years trying to understand what have been learned by the models. This is commonly done by generating a heatmap indicating to which extent each pixel of the input contributes to the final classification for a trained model. Despite the wide use, it is not yet understood… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.06948v3-abstract-full').style.display = 'inline'; document.getElementById('2202.06948v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.06948v3-abstract-full" style="display: none;"> As deep learning has achieved state-of-the-art performance for many tasks of EEG-based BCI, many efforts have been made in recent years trying to understand what have been learned by the models. This is commonly done by generating a heatmap indicating to which extent each pixel of the input contributes to the final classification for a trained model. Despite the wide use, it is not yet understood to which extent the obtained interpretation results can be trusted and how accurate they can reflect the model decisions. In order to fill this research gap, we conduct a study to evaluate different deep interpretation techniques quantitatively on EEG datasets. The results reveal the importance of selecting a proper interpretation technique as the initial step. In addition, we also find that the quality of the interpretation results is inconsistent for individual samples despite when a method with an overall good performance is used. Many factors, including model structure and dataset types, could potentially affect the quality of the interpretation results. Based on the observations, we propose a set of procedures that allow the interpretation results to be presented in an understandable and trusted way. We illustrate the usefulness of our method for EEG-based BCI with instances selected from different scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.06948v3-abstract-full').style.display = 'none'; document.getElementById('2202.06948v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.06465">arXiv:2202.06465</a> <span> [<a href="https://arxiv.org/pdf/2202.06465">pdf</a>, <a href="https://arxiv.org/format/2202.06465">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A State-of-the-art Survey of U-Net in Microscopic Image Analysis: from Simple Usage to Structure Mortification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wu%2C+J">Jian Wu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+W">Wanli Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C">Chen Li</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Shariful%2C+I+M">Islam Mohammad Shariful</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+H">Hongzan Sun</a>, <a href="/search/eess?searchtype=author&query=Li%2C+X">Xiaoqi Li</a>, <a href="/search/eess?searchtype=author&query=Li%2C+X">Xintong Li</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+X">Xinyu Huang</a>, <a href="/search/eess?searchtype=author&query=Grzegorzek%2C+M">Marcin Grzegorzek</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.06465v2-abstract-short" style="display: inline;"> Image analysis technology is used to solve the inadvertences of artificial traditional methods in disease, wastewater treatment, environmental change monitoring analysis and convolutional neural networks (CNN) play an important role in microscopic image analysis. An important step in detection, tracking, monitoring, feature extraction, modeling and analysis is image segmentation, in which U-Net ha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.06465v2-abstract-full').style.display = 'inline'; document.getElementById('2202.06465v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.06465v2-abstract-full" style="display: none;"> Image analysis technology is used to solve the inadvertences of artificial traditional methods in disease, wastewater treatment, environmental change monitoring analysis and convolutional neural networks (CNN) play an important role in microscopic image analysis. An important step in detection, tracking, monitoring, feature extraction, modeling and analysis is image segmentation, in which U-Net has increasingly applied in microscopic image segmentation. This paper comprehensively reviews the development history of U-Net, and analyzes various research results of various segmentation methods since the emergence of U-Net and conducts a comprehensive review of related papers. First, this paper has summarized the improved methods of U-Net and then listed the existing significance of image segmentation techniques and their improvements that has introduced over the years. Finally, focusing on the different improvement strategies of U-Net in different papers, the related work of each application target is reviewed according to detailed technical categories to facilitate future research. Researchers can clearly see the dynamics of transmission of technological development and keep up with future trends in this interdisciplinary field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.06465v2-abstract-full').style.display = 'none'; document.getElementById('2202.06465v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 April, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2112.13261">arXiv:2112.13261</a> <span> [<a href="https://arxiv.org/pdf/2112.13261">pdf</a>, <a href="https://arxiv.org/format/2112.13261">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Interference Nulling Using Reconfigurable Intelligent Surface </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+W">Wei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2112.13261v2-abstract-short" style="display: inline;"> This paper investigates the interference nulling capability of reconfigurable intelligent surface (RIS) in a multiuser environment where multiple single-antenna transceivers communicate simultaneously in a shared spectrum. From a theoretical perspective, we show that when the channels between the RIS and the transceivers have line-of-sight and the direct paths are blocked, it is possible to adjust… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.13261v2-abstract-full').style.display = 'inline'; document.getElementById('2112.13261v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2112.13261v2-abstract-full" style="display: none;"> This paper investigates the interference nulling capability of reconfigurable intelligent surface (RIS) in a multiuser environment where multiple single-antenna transceivers communicate simultaneously in a shared spectrum. From a theoretical perspective, we show that when the channels between the RIS and the transceivers have line-of-sight and the direct paths are blocked, it is possible to adjust the phases of the RIS elements to null out all the interference completely and to achieve the maximum $K$ degrees-of-freedom (DoF) in the overall $K$-user interference channel, provided that the number of RIS elements exceeds some finite value that depends on $K$. Algorithmically, for any fixed channel realization we formulate the interference nulling problem as a feasibility problem, and propose an alternating projection algorithm to efficiently solve the resulting nonconvex problem with local convergence guarantee. Numerical results show that the proposed alternating projection algorithm can null all the interference if the number of RIS elements is only slightly larger than a threshold of $2K(K-1)$. For the practical sum-rate maximization objective, this paper proposes to use the zero-forcing solution obtained from alternating projection as an initial point for subsequent Riemannian conjugate gradient optimization and shows that it has a significant performance advantage over random initializations. For the objective of maximizing the minimum rate, this paper proposes a subgradient projection method which is capable of achieving excellent performance at low complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2112.13261v2-abstract-full').style.display = 'none'; document.getElementById('2112.13261v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper is accepted in IEEE Journal on Selected Areas in Communications</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.09121">arXiv:2110.09121</a> <span> [<a href="https://arxiv.org/pdf/2110.09121">pdf</a>, <a href="https://arxiv.org/ps/2110.09121">ps</a>, <a href="https://arxiv.org/format/2110.09121">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> KaraTuner: Towards end to end natural pitch correction for singing voice in karaoke </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhuang%2C+X">Xiaobin Zhuang</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+H">Huiran Yu</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+W">Weifeng Zhao</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tao Jiang</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+P">Peng Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.09121v2-abstract-short" style="display: inline;"> An automatic pitch correction system typically includes several stages, such as pitch extraction, deviation estimation, pitch shift processing, and cross-fade smoothing. However, designing these components with strategies often requires domain expertise and they are likely to fail on corner cases. In this paper, we present KaraTuner, an end-to-end neural architecture that predicts pitch curve and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.09121v2-abstract-full').style.display = 'inline'; document.getElementById('2110.09121v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.09121v2-abstract-full" style="display: none;"> An automatic pitch correction system typically includes several stages, such as pitch extraction, deviation estimation, pitch shift processing, and cross-fade smoothing. However, designing these components with strategies often requires domain expertise and they are likely to fail on corner cases. In this paper, we present KaraTuner, an end-to-end neural architecture that predicts pitch curve and resynthesizes the singing voice directly from the tuned pitch and vocal spectrum extracted from the original recordings. Several vital technical points have been introduced in KaraTuner to ensure pitch accuracy, pitch naturalness, timbre consistency, and sound quality. A feed-forward Transformer is employed in the pitch predictor to capture longterm dependencies in the vocal spectrum and musical note. We also develop a pitch-controllable vocoder based on a novel source-filter block and the Fre-GAN architecture. KaraTuner obtains a higher preference than the rule-based pitch correction approach through A/B tests, and perceptual experiments show that the proposed vocoder achieves significant advantages in timbre consistency and sound quality compared with the parametric WORLD vocoder, phase vocoder and CLPC vocoder. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.09121v2-abstract-full').style.display = 'none'; document.getElementById('2110.09121v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To be published in Proc. Interspeech 2022, Incheon, South Korea</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.11617">arXiv:2107.11617</a> <span> [<a href="https://arxiv.org/pdf/2107.11617">pdf</a>, <a href="https://arxiv.org/format/2107.11617">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> LAConv: Local Adaptive Convolution for Image Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jin%2C+Z">Zi-Rong Jin</a>, <a href="/search/eess?searchtype=author&query=Deng%2C+L">Liang-Jian Deng</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tai-Xiang Jiang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+T">Tian-Jing Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.11617v1-abstract-short" style="display: inline;"> The convolution operation is a powerful tool for feature extraction and plays a prominent role in the field of computer vision. However, when targeting the pixel-wise tasks like image fusion, it would not fully perceive the particularity of each pixel in the image if the uniform convolution kernel is used on different patches. In this paper, we propose a local adaptive convolution (LAConv), which… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.11617v1-abstract-full').style.display = 'inline'; document.getElementById('2107.11617v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.11617v1-abstract-full" style="display: none;"> The convolution operation is a powerful tool for feature extraction and plays a prominent role in the field of computer vision. However, when targeting the pixel-wise tasks like image fusion, it would not fully perceive the particularity of each pixel in the image if the uniform convolution kernel is used on different patches. In this paper, we propose a local adaptive convolution (LAConv), which is dynamically adjusted to different spatial locations. LAConv enables the network to pay attention to every specific local area in the learning process. Besides, the dynamic bias (DYB) is introduced to provide more possibilities for the depiction of features and make the network more flexible. We further design a residual structure network equipped with the proposed LAConv and DYB modules, and apply it to two image fusion tasks. Experiments for pansharpening and hyperspectral image super-resolution (HISR) demonstrate the superiority of our method over other state-of-the-art methods. It is worth mentioning that LAConv can also be competent for other super-resolution tasks with less computation effort. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.11617v1-abstract-full').style.display = 'none'; document.getElementById('2107.11617v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.12470">arXiv:2106.12470</a> <span> [<a href="https://arxiv.org/pdf/2106.12470">pdf</a>, <a href="https://arxiv.org/ps/2106.12470">ps</a>, <a href="https://arxiv.org/format/2106.12470">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Bilateral Control of Teleoperators with Closed Architecture and Time-Varying Delay </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+H">Hanlei Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yipeng Li</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tiantian Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.12470v1-abstract-short" style="display: inline;"> This paper investigates bilateral control of teleoperators with closed architecture and subjected to arbitrary bounded time-varying delay. A prominent challenge for bilateral control of such teleoperators lies in the closed architecture, especially in the context not involving interaction force/torque measurement. This yields the long-standing situation that most bilateral control rigorously devel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.12470v1-abstract-full').style.display = 'inline'; document.getElementById('2106.12470v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.12470v1-abstract-full" style="display: none;"> This paper investigates bilateral control of teleoperators with closed architecture and subjected to arbitrary bounded time-varying delay. A prominent challenge for bilateral control of such teleoperators lies in the closed architecture, especially in the context not involving interaction force/torque measurement. This yields the long-standing situation that most bilateral control rigorously developed in the literature is hard to be justified as applied to teleoperators with closed architecture. With a new class of dynamic feedback, we propose kinematic and adaptive dynamic controllers for teleoperators with closed architecture, and we show that the proposed kinematic and dynamic controllers are robust with respect to arbitrary bounded time-varying delay. In addition, by exploiting the input-output properties of an inverted form of the dynamics of robot manipulators with closed architecture, we remove the assumption of uniform exponential stability of a linear time-varying system due to the adaptation to the gains of the inner controller in demonstrating stability of the presented adaptive dynamic control. The application of the proposed approach is illustrated by the experimental results using a Phantom Omni and a UR10 robot. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.12470v1-abstract-full').style.display = 'none'; document.getElementById('2106.12470v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This version is prepared with the consideration of the reviewers' and AE's comments</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.14320">arXiv:2105.14320</a> <span> [<a href="https://arxiv.org/pdf/2105.14320">pdf</a>, <a href="https://arxiv.org/format/2105.14320">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TIP.2022.3176220">10.1109/TIP.2022.3176220 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Self-Supervised Nonlinear Transform-Based Tensor Nuclear Norm for Multi-Dimensional Image Recovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Luo%2C+Y">Yi-Si Luo</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+X">Xi-Le Zhao</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Tai-Xiang Jiang</a>, <a href="/search/eess?searchtype=author&query=Chang%2C+Y">Yi Chang</a>, <a href="/search/eess?searchtype=author&query=Ng%2C+M+K">Michael K. Ng</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C">Chao Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.14320v1-abstract-short" style="display: inline;"> In this paper, we study multi-dimensional image recovery. Recently, transform-based tensor nuclear norm minimization methods are considered to capture low-rank tensor structures to recover third-order tensors in multi-dimensional image processing applications. The main characteristic of such methods is to perform the linear transform along the third mode of third-order tensors, and then compute te… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.14320v1-abstract-full').style.display = 'inline'; document.getElementById('2105.14320v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.14320v1-abstract-full" style="display: none;"> In this paper, we study multi-dimensional image recovery. Recently, transform-based tensor nuclear norm minimization methods are considered to capture low-rank tensor structures to recover third-order tensors in multi-dimensional image processing applications. The main characteristic of such methods is to perform the linear transform along the third mode of third-order tensors, and then compute tensor nuclear norm minimization on the transformed tensor so that the underlying low-rank tensors can be recovered. The main aim of this paper is to propose a nonlinear multilayer neural network to learn a nonlinear transform via the observed tensor data under self-supervision. The proposed network makes use of low-rank representation of transformed tensors and data-fitting between the observed tensor and the reconstructed tensor to construct the nonlinear transformation. Extensive experimental results on tensor completion, background subtraction, robust tensor completion, and snapshot compressive imaging are presented to demonstrate that the performance of the proposed method is better than that of state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.14320v1-abstract-full').style.display = 'none'; document.getElementById('2105.14320v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Jiang%2C+T&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Jiang%2C+T&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Jiang%2C+T&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository