Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 231 results for author: <span class="mathjax">Li, T</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/eess" aria-role="search"> Searching in archive <strong>eess</strong>. <a href="/search/?searchtype=author&query=Li%2C+T">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Li, T"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Li%2C+T&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Li, T"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Li%2C+T&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Li%2C+T&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+T&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+T&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+T&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+T&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06113">arXiv:2411.06113</a> <span> [<a href="https://arxiv.org/pdf/2411.06113">pdf</a>, <a href="https://arxiv.org/format/2411.06113">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Behavior-Aware Efficient Detection of Malicious EVs in V2G Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wu%2C+R">Ruixiang Wu</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+X">Xudong Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tongxin Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06113v1-abstract-short" style="display: inline;"> With the rapid development of electric vehicles (EVs) and vehicle-to-grid (V2G) technology, detecting malicious EV drivers is becoming increasingly important for the reliability and efficiency of smart grids. To address this challenge, machine learning (ML) algorithms are employed to predict user behavior and identify patterns of non-cooperation. However, the ML predictions are often untrusted, wh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06113v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06113v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06113v1-abstract-full" style="display: none;"> With the rapid development of electric vehicles (EVs) and vehicle-to-grid (V2G) technology, detecting malicious EV drivers is becoming increasingly important for the reliability and efficiency of smart grids. To address this challenge, machine learning (ML) algorithms are employed to predict user behavior and identify patterns of non-cooperation. However, the ML predictions are often untrusted, which can significantly degrade the performance of existing algorithms. In this paper, we propose a safety-enabled group testing scheme, \ouralg, which combines the efficiency of probabilistic group testing with ML predictions and the robustness of combinatorial group testing. We prove that \ouralg is $O(d)$-consistent and $O(d\log n)$-robust, striking a near-optimal trade-off. Experiments on synthetic data and case studies based on \textsc{ACN-Data}, a real-world EV charging dataset validate the efficacy of \ouralg for efficiently detecting malicious users in V2G systems. Our findings contribute to the growing field of algorithms with predictions and provide insights for incorporating distributional ML advice into algorithmic decision-making in energy and transportation-related systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06113v1-abstract-full').style.display = 'none'; document.getElementById('2411.06113v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06082">arXiv:2411.06082</a> <span> [<a href="https://arxiv.org/pdf/2411.06082">pdf</a>, <a href="https://arxiv.org/format/2411.06082">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Quasi-Newton OMP Approach for Super-Resolution Channel Estimation and Extrapolation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zeng%2C+Y">Yi Zeng</a>, <a href="/search/eess?searchtype=author&query=Han%2C+M">Mingguang Han</a>, <a href="/search/eess?searchtype=author&query=Li%2C+X">Xiaoguang Li</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tiejun Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06082v1-abstract-short" style="display: inline;"> Channel estimation and extrapolation are fundamental issues in MIMO communication systems. In this paper, we proposed the quasi-Newton orthogonal matching pursuit (QNOMP) approach to overcome these issues with high efficiency while maintaining accuracy. The algorithm consists of two stages on the super-resolution recovery: we first performed a cheap on-grid OMP estimation of channel parameters in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06082v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06082v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06082v1-abstract-full" style="display: none;"> Channel estimation and extrapolation are fundamental issues in MIMO communication systems. In this paper, we proposed the quasi-Newton orthogonal matching pursuit (QNOMP) approach to overcome these issues with high efficiency while maintaining accuracy. The algorithm consists of two stages on the super-resolution recovery: we first performed a cheap on-grid OMP estimation of channel parameters in the sparsity domain (e.g., delay or angle), then an off-grid optimization to achieve the super-resolution. In the off-grid stage, we employed the BFGS quasi-Newton method to jointly estimate the parameters through a multipath model, which improved the speed and accuracy significantly. Furthermore, we derived the optimal extrapolated solution in the linear minimum mean squared estimator criterion, revealed its connection with Slepian basis, and presented a practical algorithm to realize the extrapolation based on the QNOMP results. Special treatment utilizing the block sparsity nature of the considered channels was also proposed. Numerical experiments on the simulated models and CDL-C channels demonstrated the high performance and low computational complexity of QNOMP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06082v1-abstract-full').style.display = 'none'; document.getElementById('2411.06082v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01918">arXiv:2411.01918</a> <span> [<a href="https://arxiv.org/pdf/2411.01918">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Preemptive Holistic Collaborative System and Its Application in Road Transportation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Peng%2C+T">Ting Peng</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yuan Li</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tao Li</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+X">Xiaoxue Xu</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+X">Xiang Dong</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+Y">Yincai Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01918v1-abstract-short" style="display: inline;"> Numerous real-world systems, including manufacturing processes, supply chains, and robotic systems, involve multiple independent entities with diverse objectives. The potential for conflicts arises from the inability of these entities to accurately predict and anticipate each other's actions. To address this challenge, we propose the Preemptive Holistic Collaborative System (PHCS) framework. By en… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01918v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01918v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01918v1-abstract-full" style="display: none;"> Numerous real-world systems, including manufacturing processes, supply chains, and robotic systems, involve multiple independent entities with diverse objectives. The potential for conflicts arises from the inability of these entities to accurately predict and anticipate each other's actions. To address this challenge, we propose the Preemptive Holistic Collaborative System (PHCS) framework. By enabling information sharing and collaborative planning among independent entities, the PHCS facilitates the preemptive resolution of potential conflicts. We apply the PHCS framework to the specific context of road transportation, resulting in the Preemptive Holistic Collaborative Road Transportation System (PHCRTS). This system leverages shared driving intentions and pre-planned trajectories to optimize traffic flow and enhance safety. Simulation experiments in a two-lane merging scenario demonstrate the effectiveness of PHCRTS, reducing vehicle time delays by 90%, increasing traffic capacity by 300%, and eliminating accidents. The PHCS framework offers a promising approach to optimize the performance and safety of complex systems with multiple independent entities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01918v1-abstract-full').style.display = 'none'; document.getElementById('2411.01918v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01194">arXiv:2411.01194</a> <span> [<a href="https://arxiv.org/pdf/2411.01194">pdf</a>, <a href="https://arxiv.org/format/2411.01194">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Relay Satellite Assisted LEO Constellation NOMA Communication System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xuyang Zhang</a>, <a href="/search/eess?searchtype=author&query=Yue%2C+X">Xinwei Yue</a>, <a href="/search/eess?searchtype=author&query=Han%2C+Z">Zhihao Han</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tian Li</a>, <a href="/search/eess?searchtype=author&query=Shen%2C+X">Xia Shen</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yafei Wang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+R">Rongke Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01194v1-abstract-short" style="display: inline;"> This paper proposes a relay satellite assisted low earth orbit (LEO) constellation non-orthogonal multiple access combined beamforming (R-NOMA-BF) communication system, where multiple antenna LEO satellites deliver information to ground non-orthogonal users. To measure the service quality, we formulate a resource allocation problem to minimize the second-order difference between the achievable cap… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01194v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01194v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01194v1-abstract-full" style="display: none;"> This paper proposes a relay satellite assisted low earth orbit (LEO) constellation non-orthogonal multiple access combined beamforming (R-NOMA-BF) communication system, where multiple antenna LEO satellites deliver information to ground non-orthogonal users. To measure the service quality, we formulate a resource allocation problem to minimize the second-order difference between the achievable capacity and user request traffic. Based on the above problem, joint optimization for LEO satellite-cell assignment factor, NOMA power and BF vector is taken into account. The optimization variables are analyzed with respect to feasibility and non-convexity. Additionally, we provide a pair of effective algorithms, i.e., doppler shift LEO satellite-cell assisted monotonic programming of NOMA with BF vector (D-mNOMA-BF) and ant colony pathfinding based NOMA exponential cone programming with BF vector (A-eNOMA-BF). Two compromise algorithms regarding the above are also presented. Numerical results show that: 1) D-mNOMA-BF and A-eNOMA-BF algorithms are superior to that of orthogonal multiple access based BF (OMA-BF) and polarization multiplexing schemes; 2) With the increasing number of antennas and single satellite power, R-NOMA-BF system is able to expand users satisfaction; and 3) By comparing various imperfect successive interference cancellation, the performance of A-mNOMA-BF algorithm exceeds D-mNOMA-BF. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01194v1-abstract-full').style.display = 'none'; document.getElementById('2411.01194v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01156">arXiv:2411.01156</a> <span> [<a href="https://arxiv.org/pdf/2411.01156">pdf</a>, <a href="https://arxiv.org/format/2411.01156">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Fish-Speech: Leveraging Large Language Models for Advanced Multilingual Text-to-Speech Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liao%2C+S">Shijia Liao</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yuxuan Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tianyu Li</a>, <a href="/search/eess?searchtype=author&query=Cheng%2C+Y">Yifan Cheng</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+R">Ruoyi Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+R">Rongzhi Zhou</a>, <a href="/search/eess?searchtype=author&query=Xing%2C+Y">Yijin Xing</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01156v2-abstract-short" style="display: inline;"> Text-to-Speech (TTS) systems face ongoing challenges in processing complex linguistic features, handling polyphonic expressions, and producing natural-sounding multilingual speech - capabilities that are crucial for future AI applications. In this paper, we present Fish-Speech, a novel framework that implements a serial fast-slow Dual Autoregressive (Dual-AR) architecture to enhance the stability… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01156v2-abstract-full').style.display = 'inline'; document.getElementById('2411.01156v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01156v2-abstract-full" style="display: none;"> Text-to-Speech (TTS) systems face ongoing challenges in processing complex linguistic features, handling polyphonic expressions, and producing natural-sounding multilingual speech - capabilities that are crucial for future AI applications. In this paper, we present Fish-Speech, a novel framework that implements a serial fast-slow Dual Autoregressive (Dual-AR) architecture to enhance the stability of Grouped Finite Scalar Vector Quantization (GFSQ) in sequence generation tasks. This architecture improves codebook processing efficiency while maintaining high-fidelity outputs, making it particularly effective for AI interactions and voice cloning. Fish-Speech leverages Large Language Models (LLMs) for linguistic feature extraction, eliminating the need for traditional grapheme-to-phoneme (G2P) conversion and thereby streamlining the synthesis pipeline and enhancing multilingual support. Additionally, we developed FF-GAN through GFSQ to achieve superior compression ratios and near 100\% codebook utilization. Our approach addresses key limitations of current TTS systems while providing a foundation for more sophisticated, context-aware speech synthesis. Experimental results show that Fish-Speech significantly outperforms baseline models in handling complex linguistic scenarios and voice cloning tasks, demonstrating its potential to advance TTS technology in AI applications. The implementation is open source at \href{https://github.com/fishaudio/fish-speech}{https://github.com/fishaudio/fish-speech}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01156v2-abstract-full').style.display = 'none'; document.getElementById('2411.01156v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22672">arXiv:2410.22672</a> <span> [<a href="https://arxiv.org/pdf/2410.22672">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> IM-GIV: an effective integrity monitoring scheme for tightly-coupled GNSS/INS/Vision integration based on factor graph optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Tian%2C+Y">Yunong Tian</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tuan Li</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+H">Haitao Jiang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zhipeng Wang</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+C">Chuang Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22672v1-abstract-short" style="display: inline;"> Global Navigation Satellite System/Inertial Navigation System (GNSS/INS)/Vision integration based on factor graph optimization (FGO) has recently attracted extensive attention in navigation and robotics community. Integrity monitoring (IM) capability is required when FGO-based integrated navigation system is used for safety-critical applications. However, traditional researches on IM of integrated… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22672v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22672v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22672v1-abstract-full" style="display: none;"> Global Navigation Satellite System/Inertial Navigation System (GNSS/INS)/Vision integration based on factor graph optimization (FGO) has recently attracted extensive attention in navigation and robotics community. Integrity monitoring (IM) capability is required when FGO-based integrated navigation system is used for safety-critical applications. However, traditional researches on IM of integrated navigation system are mostly based on Kalman filter. It is urgent to develop effective IM scheme for FGO-based GNSS/INS/Vision integration. In this contribution, the position error bounding formula to ensure the integrity of the GNSS/INS/Vision integration based on FGO is designed and validated for the first time. It can be calculated by the linearized equations from the residuals of GNSS pseudo-range, IMU pre-integration and visual measurements. The specific position error bounding is given in the case of GNSS, INS and visual measurement faults. Field experiments were conducted to evaluate and validate the performance of the proposed position error bounding. Experimental results demonstrate that the proposed position error bounding for the GNSS/INS/Vision integration based on FGO can correctly fit the position error against different fault modes, and the availability of integrity in six fault modes is 100% after correct and timely fault exclusion. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22672v1-abstract-full').style.display = 'none'; document.getElementById('2410.22672v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19813">arXiv:2410.19813</a> <span> [<a href="https://arxiv.org/pdf/2410.19813">pdf</a>, <a href="https://arxiv.org/format/2410.19813">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Threshold-Based Automated Pest Detection System for Sustainable Agriculture </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+T">Tianle Li</a>, <a href="/search/eess?searchtype=author&query=Shu%2C+J">Jia Shu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Q">Qinghong Chen</a>, <a href="/search/eess?searchtype=author&query=Abrar%2C+M+M">Murad Mehrab Abrar</a>, <a href="/search/eess?searchtype=author&query=Raiti%2C+J">John Raiti</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19813v1-abstract-short" style="display: inline;"> This paper presents a threshold-based automated pea weevil detection system, developed as part of the Microsoft FarmVibes project. Based on Internet-of-Things (IoT) and computer vision, the system is designed to monitor and manage pea weevil populations in agricultural settings, with the goal of enhancing crop production and promoting sustainable farming practices. Unlike the machine learning-base… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19813v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19813v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19813v1-abstract-full" style="display: none;"> This paper presents a threshold-based automated pea weevil detection system, developed as part of the Microsoft FarmVibes project. Based on Internet-of-Things (IoT) and computer vision, the system is designed to monitor and manage pea weevil populations in agricultural settings, with the goal of enhancing crop production and promoting sustainable farming practices. Unlike the machine learning-based approaches, our detection approach relies on binary grayscale thresholding and contour detection techniques determined by the pea weevil sizes. We detail the design of the product, the system architecture, the integration of hardware and software components, and the overall technology strategy. Our test results demonstrate significant effectiveness in weevil management and offer promising scalability for deployment in resource-constrained environments. In addition, the software has been open-sourced for the global research community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19813v1-abstract-full').style.display = 'none'; document.getElementById('2410.19813v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication at the 7th IEEE International Conference on Internet of Things and Intelligence System (IOTAIS 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13720">arXiv:2410.13720</a> <span> [<a href="https://arxiv.org/pdf/2410.13720">pdf</a>, <a href="https://arxiv.org/format/2410.13720">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Movie Gen: A Cast of Media Foundation Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Polyak%2C+A">Adam Polyak</a>, <a href="/search/eess?searchtype=author&query=Zohar%2C+A">Amit Zohar</a>, <a href="/search/eess?searchtype=author&query=Brown%2C+A">Andrew Brown</a>, <a href="/search/eess?searchtype=author&query=Tjandra%2C+A">Andros Tjandra</a>, <a href="/search/eess?searchtype=author&query=Sinha%2C+A">Animesh Sinha</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+A">Ann Lee</a>, <a href="/search/eess?searchtype=author&query=Vyas%2C+A">Apoorv Vyas</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+B">Bowen Shi</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+C">Chih-Yao Ma</a>, <a href="/search/eess?searchtype=author&query=Chuang%2C+C">Ching-Yao Chuang</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+D">David Yan</a>, <a href="/search/eess?searchtype=author&query=Choudhary%2C+D">Dhruv Choudhary</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+D">Dingkang Wang</a>, <a href="/search/eess?searchtype=author&query=Sethi%2C+G">Geet Sethi</a>, <a href="/search/eess?searchtype=author&query=Pang%2C+G">Guan Pang</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+H">Haoyu Ma</a>, <a href="/search/eess?searchtype=author&query=Misra%2C+I">Ishan Misra</a>, <a href="/search/eess?searchtype=author&query=Hou%2C+J">Ji Hou</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jialiang Wang</a>, <a href="/search/eess?searchtype=author&query=Jagadeesh%2C+K">Kiran Jagadeesh</a>, <a href="/search/eess?searchtype=author&query=Li%2C+K">Kunpeng Li</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+L">Luxin Zhang</a>, <a href="/search/eess?searchtype=author&query=Singh%2C+M">Mannat Singh</a>, <a href="/search/eess?searchtype=author&query=Williamson%2C+M">Mary Williamson</a>, <a href="/search/eess?searchtype=author&query=Le%2C+M">Matt Le</a> , et al. (63 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13720v1-abstract-short" style="display: inline;"> We present Movie Gen, a cast of foundation models that generates high-quality, 1080p HD videos with different aspect ratios and synchronized audio. We also show additional capabilities such as precise instruction-based video editing and generation of personalized videos based on a user's image. Our models set a new state-of-the-art on multiple tasks: text-to-video synthesis, video personalization,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13720v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13720v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13720v1-abstract-full" style="display: none;"> We present Movie Gen, a cast of foundation models that generates high-quality, 1080p HD videos with different aspect ratios and synchronized audio. We also show additional capabilities such as precise instruction-based video editing and generation of personalized videos based on a user's image. Our models set a new state-of-the-art on multiple tasks: text-to-video synthesis, video personalization, video editing, video-to-audio generation, and text-to-audio generation. Our largest video generation model is a 30B parameter transformer trained with a maximum context length of 73K video tokens, corresponding to a generated video of 16 seconds at 16 frames-per-second. We show multiple technical innovations and simplifications on the architecture, latent spaces, training objectives and recipes, data curation, evaluation protocols, parallelization techniques, and inference optimizations that allow us to reap the benefits of scaling pre-training data, model size, and training compute for training large scale media generation models. We hope this paper helps the research community to accelerate progress and innovation in media generation models. All videos from this paper are available at https://go.fb.me/MovieGenResearchVideos. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13720v1-abstract-full').style.display = 'none'; document.getElementById('2410.13720v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12811">arXiv:2410.12811</a> <span> [<a href="https://arxiv.org/pdf/2410.12811">pdf</a>, <a href="https://arxiv.org/format/2410.12811">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Decoding Emotions: Unveiling Facial Expressions through Acoustic Sensing with Contrastive Attention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+G">Guangjing Wang</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Juexing Wang</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+C">Ce Zhou</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+W">Weikang Ding</a>, <a href="/search/eess?searchtype=author&query=Zeng%2C+H">Huacheng Zeng</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tianxing Li</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+Q">Qiben Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12811v1-abstract-short" style="display: inline;"> Expression recognition holds great promise for applications such as content recommendation and mental healthcare by accurately detecting users' emotional states. Traditional methods often rely on cameras or wearable sensors, which raise privacy concerns and add extra device burdens. In addition, existing acoustic-based methods struggle to maintain satisfactory performance when there is a distribut… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12811v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12811v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12811v1-abstract-full" style="display: none;"> Expression recognition holds great promise for applications such as content recommendation and mental healthcare by accurately detecting users' emotional states. Traditional methods often rely on cameras or wearable sensors, which raise privacy concerns and add extra device burdens. In addition, existing acoustic-based methods struggle to maintain satisfactory performance when there is a distribution shift between the training dataset and the inference dataset. In this paper, we introduce FacER+, an active acoustic facial expression recognition system, which eliminates the requirement for external microphone arrays. FacER+ extracts facial expression features by analyzing the echoes of near-ultrasound signals emitted between the 3D facial contour and the earpiece speaker on a smartphone. This approach not only reduces background noise but also enables the identification of different expressions from various users with minimal training data. We develop a contrastive external attention-based model to consistently learn expression features across different users, reducing the distribution differences. Extensive experiments involving 20 volunteers, both with and without masks, demonstrate that FacER+ can accurately recognize six common facial expressions with over 90% accuracy in diverse, user-independent real-life scenarios, surpassing the performance of the leading acoustic sensing methods by 10%. FacER+ offers a robust and practical solution for facial expression recognition. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12811v1-abstract-full').style.display = 'none'; document.getElementById('2410.12811v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The extended version of the 2023 IEEE INFOCOM conference paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12100">arXiv:2410.12100</a> <span> [<a href="https://arxiv.org/pdf/2410.12100">pdf</a>, <a href="https://arxiv.org/format/2410.12100">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Enhancing IoT Communication and Localization via Smarter Antenna </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+T">Tianxiang Li</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+H">Haofan Lu</a>, <a href="/search/eess?searchtype=author&query=Abari%2C+O">Omid Abari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12100v2-abstract-short" style="display: inline;"> The convergence of sensing and communication functionalities is poised to become a pivotal feature of the sixth-generation (6G) wireless networks. This vision represents a paradigm shift in wireless network design, moving beyond mere communication to a holistic integration of sensing and communication capabilities, thereby further narrowing the gap between the physical and digital worlds. While In… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12100v2-abstract-full').style.display = 'inline'; document.getElementById('2410.12100v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12100v2-abstract-full" style="display: none;"> The convergence of sensing and communication functionalities is poised to become a pivotal feature of the sixth-generation (6G) wireless networks. This vision represents a paradigm shift in wireless network design, moving beyond mere communication to a holistic integration of sensing and communication capabilities, thereby further narrowing the gap between the physical and digital worlds. While Internet of Things (IoT) devices are integral to future wireless networks, their current capabilities in sensing and communication are constrained by their power and resource limitations. On one hand, their restricted power budget limits their transmission power, leading to reduced communication range and data rates. On the other hand, their limited hardware and processing abilities hinder the adoption of sophisticated sensing technologies, such as direction finding and localization. In this work, we introduce Wi-Pro, a system which seamlessly integrates today's WiFi protocol with smart antenna design to enhance the communication and sensing capabilities of existing IoT devices. This plug-and-play system can be easily installed by replacing the IoT device's antenna. Wi-Pro seamlessly integrates smart antenna hardware with current WiFi protocols, utilizing their inherent features to not only enhance communication but also to enable precise localization on low-cost IoT devices. Our evaluation results demonstrate that Wi-Pro achieves up to 150\% data rate improvement, up to five times range improvement, accurate direction finding, and localization on single-chain IoT devices. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12100v2-abstract-full').style.display = 'none'; document.getElementById('2410.12100v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to the IEEE IoT Journal for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14676">arXiv:2409.14676</a> <span> [<a href="https://arxiv.org/pdf/2409.14676">pdf</a>, <a href="https://arxiv.org/format/2409.14676">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TransUKAN:Computing-Efficient Hybrid KAN-Transformer for Enhanced Medical Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yanlin Wu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tao Li</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Z">Zhihong Wang</a>, <a href="/search/eess?searchtype=author&query=Kang%2C+H">Hong Kang</a>, <a href="/search/eess?searchtype=author&query=He%2C+A">Along He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14676v2-abstract-short" style="display: inline;"> U-Net is currently the most widely used architecture for medical image segmentation. Benefiting from its unique encoder-decoder architecture and skip connections, it can effectively extract features from input images to segment target regions. The commonly used U-Net is typically based on convolutional operations or Transformers, modeling the dependencies between local or global information to acc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14676v2-abstract-full').style.display = 'inline'; document.getElementById('2409.14676v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14676v2-abstract-full" style="display: none;"> U-Net is currently the most widely used architecture for medical image segmentation. Benefiting from its unique encoder-decoder architecture and skip connections, it can effectively extract features from input images to segment target regions. The commonly used U-Net is typically based on convolutional operations or Transformers, modeling the dependencies between local or global information to accomplish medical image analysis tasks. However, convolutional layers, fully connected layers, and attention mechanisms used in this process introduce a significant number of parameters, often requiring the stacking of network layers to model complex nonlinear relationships, which can impact the training process. To address these issues, we propose TransUKAN. Specifically, we have improved the KAN to reduce memory usage and computational load. On this basis, we explored an effective combination of KAN, Transformer, and U-Net structures. This approach enhances the model's capability to capture nonlinear relationships by introducing only a small number of additional parameters and compensates for the Transformer structure's deficiency in local information extraction. We validated TransUKAN on multiple medical image segmentation tasks. Experimental results demonstrate that TransUKAN achieves excellent performance with significantly reduced parameters. The code will be available athttps://github.com/wuyanlin-wyl/TransUKAN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14676v2-abstract-full').style.display = 'none'; document.getElementById('2409.14676v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14340">arXiv:2409.14340</a> <span> [<a href="https://arxiv.org/pdf/2409.14340">pdf</a>, <a href="https://arxiv.org/format/2409.14340">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Self-Supervised Audio-Visual Soundscape Stylization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+T">Tingle Li</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+R">Renhao Wang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+P">Po-Yao Huang</a>, <a href="/search/eess?searchtype=author&query=Owens%2C+A">Andrew Owens</a>, <a href="/search/eess?searchtype=author&query=Anumanchipalli%2C+G">Gopala Anumanchipalli</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14340v1-abstract-short" style="display: inline;"> Speech sounds convey a great deal of information about the scenes, resulting in a variety of effects ranging from reverberation to additional ambient sounds. In this paper, we manipulate input speech to sound as though it was recorded within a different scene, given an audio-visual conditional example recorded from that scene. Our model learns through self-supervision, taking advantage of the fact… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14340v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14340v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14340v1-abstract-full" style="display: none;"> Speech sounds convey a great deal of information about the scenes, resulting in a variety of effects ranging from reverberation to additional ambient sounds. In this paper, we manipulate input speech to sound as though it was recorded within a different scene, given an audio-visual conditional example recorded from that scene. Our model learns through self-supervision, taking advantage of the fact that natural video contains recurring sound events and textures. We extract an audio clip from a video and apply speech enhancement. We then train a latent diffusion model to recover the original speech, using another audio-visual clip taken from elsewhere in the video as a conditional hint. Through this process, the model learns to transfer the conditional example's sound properties to the input speech. We show that our model can be successfully trained using unlabeled, in-the-wild videos, and that an additional visual signal can improve its sound prediction abilities. Please see our project webpage for video results: https://tinglok.netlify.app/files/avsoundscape/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14340v1-abstract-full').style.display = 'none'; document.getElementById('2409.14340v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECCV 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.10876">arXiv:2409.10876</a> <span> [<a href="https://arxiv.org/pdf/2409.10876">pdf</a>, <a href="https://arxiv.org/format/2409.10876">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Neural Fields for Adaptive Photoacoustic Computed Tomography </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+T">Tianao Li</a>, <a href="/search/eess?searchtype=author&query=Cui%2C+M">Manxiu Cui</a>, <a href="/search/eess?searchtype=author&query=Ma%2C+C">Cheng Ma</a>, <a href="/search/eess?searchtype=author&query=Alexander%2C+E">Emma Alexander</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.10876v2-abstract-short" style="display: inline;"> Photoacoustic computed tomography (PACT) is a non-invasive imaging modality with wide medical applications. Conventional PACT image reconstruction algorithms suffer from wavefront distortion caused by the heterogeneous speed of sound (SOS) in tissue, which leads to image degradation. Accounting for these effects improves image quality, but measuring the SOS distribution is experimentally expensive… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10876v2-abstract-full').style.display = 'inline'; document.getElementById('2409.10876v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.10876v2-abstract-full" style="display: none;"> Photoacoustic computed tomography (PACT) is a non-invasive imaging modality with wide medical applications. Conventional PACT image reconstruction algorithms suffer from wavefront distortion caused by the heterogeneous speed of sound (SOS) in tissue, which leads to image degradation. Accounting for these effects improves image quality, but measuring the SOS distribution is experimentally expensive. An alternative approach is to perform joint reconstruction of the initial pressure image and SOS using only the PA signals. Existing joint reconstruction methods come with limitations: high computational cost, inability to directly recover SOS, and reliance on inaccurate simplifying assumptions. Implicit neural representation, or neural fields, is an emerging technique in computer vision to learn an efficient and continuous representation of physical fields with a coordinate-based neural network. In this work, we introduce NF-APACT, an efficient self-supervised framework utilizing neural fields to estimate the SOS in service of an accurate and robust multi-channel deconvolution. Our method removes SOS aberrations an order of magnitude faster and more accurately than existing methods. We demonstrate the success of our method on a novel numerical phantom as well as an experimentally collected phantom and in vivo data. Our code and numerical phantom are available at https://github.com/Lukeli0425/NF-APACT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10876v2-abstract-full').style.display = 'none'; document.getElementById('2409.10876v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01957">arXiv:2409.01957</a> <span> [<a href="https://arxiv.org/pdf/2409.01957">pdf</a>, <a href="https://arxiv.org/ps/2409.01957">ps</a>, <a href="https://arxiv.org/format/2409.01957">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Power Control and Random Serving Mode Allocation for CJT-NCJT Hybrid Mode Enabled Cell-Free Massive MIMO With Limited Fronthauls </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Hangyu Zhang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+R">Rui Zhang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yongzhao Li</a>, <a href="/search/eess?searchtype=author&query=Ruan%2C+Y">Yuhan Ruan</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tao Li</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+D">Dong Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01957v1-abstract-short" style="display: inline;"> With a great potential of improving the service fairness and quality for user equipments (UEs), cell-free massive multiple-input multiple-output (mMIMO) has been regarded as an emerging candidate for 6G network architectures. Under ideal assumptions, the coherent joint transmission (CJT) serving mode has been considered as an optimal option for cell-free mMIMO systems, since it can achieve coheren… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01957v1-abstract-full').style.display = 'inline'; document.getElementById('2409.01957v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01957v1-abstract-full" style="display: none;"> With a great potential of improving the service fairness and quality for user equipments (UEs), cell-free massive multiple-input multiple-output (mMIMO) has been regarded as an emerging candidate for 6G network architectures. Under ideal assumptions, the coherent joint transmission (CJT) serving mode has been considered as an optimal option for cell-free mMIMO systems, since it can achieve coherent cooperation gain among the access points. However, when considering the limited fronthaul constraint in practice, the non-coherent joint transmission (NCJT) serving mode is likely to outperform CJT, since the former requires much lower fronthaul resources. In other words, the performance excellence and worseness of single serving mode (CJT or NCJT) depends on the fronthaul capacity, and any single transmission mode cannot perfectly adapt the capacity limited fronthaul. To explore the performance potential of the cell-free mMIMO system with limited fronthauls by harnessing the merits of CJT and NCJT, we propose a CJT-NCJT hybrid serving mode framework, in which UEs are allocated to operate on CJT or NCJT serving mode. To improve the sum-rate of the system with low complexity, we first propose a probability-based random serving mode allocation scheme. With a given serving mode, a successive convex approximation-based power allocation algorithm is proposed to maximize the system's sum-rate. Simulation results demonstrate the superiority of the proposed scheme. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01957v1-abstract-full').style.display = 'none'; document.getElementById('2409.01957v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 2 figures, accepted by GLOBECOM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00799">arXiv:2409.00799</a> <span> [<a href="https://arxiv.org/pdf/2409.00799">pdf</a>, <a href="https://arxiv.org/format/2409.00799">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> DMRA: An Adaptive Line Spectrum Estimation Method through Dynamical Multi-Resolution of Atoms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Han%2C+M">Mingguang Han</a>, <a href="/search/eess?searchtype=author&query=Zeng%2C+Y">Yi Zeng</a>, <a href="/search/eess?searchtype=author&query=Li%2C+X">Xiaoguang Li</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tiejun Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00799v1-abstract-short" style="display: inline;"> We proposed a novel dense line spectrum super-resolution algorithm, the DMRA, that leverages dynamical multi-resolution of atoms technique to address the limitation of traditional compressed sensing methods when handling dense point-source signals. The algorithm utilizes a smooth $\tanh$ relaxation function to replace the $\ell_0$ norm, promoting sparsity and jointly estimating the frequency atoms… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00799v1-abstract-full').style.display = 'inline'; document.getElementById('2409.00799v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00799v1-abstract-full" style="display: none;"> We proposed a novel dense line spectrum super-resolution algorithm, the DMRA, that leverages dynamical multi-resolution of atoms technique to address the limitation of traditional compressed sensing methods when handling dense point-source signals. The algorithm utilizes a smooth $\tanh$ relaxation function to replace the $\ell_0$ norm, promoting sparsity and jointly estimating the frequency atoms and complex gains. To reduce computational complexity and improve frequency estimation accuracy, a two-stage strategy was further introduced to dynamically adjust the number of the optimized degrees of freedom. The strategy first increases candidate frequencies through local refinement, then applies a sparse selector to eliminate insignificant frequencies, thereby adaptively adjusting the degrees of freedom to improve estimation accuracy. Theoretical analysis were provided to validate the proposed method for multi-parameter estimations. Computational results demonstrated that this algorithm achieves good super-resolution performance in various practical scenarios and outperforms the state-of-the-art methods in terms of frequency estimation accuracy and computational efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00799v1-abstract-full').style.display = 'none'; document.getElementById('2409.00799v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.12914">arXiv:2408.12914</a> <span> [<a href="https://arxiv.org/pdf/2408.12914">pdf</a>, <a href="https://arxiv.org/ps/2408.12914">ps</a>, <a href="https://arxiv.org/format/2408.12914">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TVT.2024.3497009">10.1109/TVT.2024.3497009 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Recursion-Based SNR Determination Method for Short Packet Transmission: Analysis and Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Yin%2C+C">Chengzhe Yin</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+R">Rui Zhang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yongzhao Li</a>, <a href="/search/eess?searchtype=author&query=Ruan%2C+Y">Yuhan Ruan</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tao Li</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+J">Jiaheng Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.12914v1-abstract-short" style="display: inline;"> The short packet transmission (SPT) has gained much attention in recent years. In SPT, the most significant characteristic is that the finite blocklength code (FBC) is adopted. With FBC, the signal-to-noise ratio (SNR) cannot be expressed as an explicit function with respect to the other transmission parameters. This raises the following two problems for the resource allocation in SPTs: (i) The ex… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12914v1-abstract-full').style.display = 'inline'; document.getElementById('2408.12914v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.12914v1-abstract-full" style="display: none;"> The short packet transmission (SPT) has gained much attention in recent years. In SPT, the most significant characteristic is that the finite blocklength code (FBC) is adopted. With FBC, the signal-to-noise ratio (SNR) cannot be expressed as an explicit function with respect to the other transmission parameters. This raises the following two problems for the resource allocation in SPTs: (i) The exact value of the SNR is hard to determine, and (ii) The property of SNR w.r.t. the other parameters is hard to analyze, which hinders the efficient optimization of them. To simultaneously tackle these problems, we have developed a recursion method in our prior work. To emphasize the significance of this method, we further analyze the convergence rate of the recursion method and investigate the property of the recursion function in this paper. Specifically, we first analyze the convergence rate of the recursion method, which indicates it can determine the SNR with low complexity. Then, we analyze the property of the recursion function, which facilitates the optimization of the other parameters during the recursion. Finally, we also enumerate some applications for the recursion method. Simulation results indicate that the recursion method converges faster than the other SNR determination methods. Besides, the results also show that the recursion-based methods can almost achieve the optimal solution of the application cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12914v1-abstract-full').style.display = 'none'; document.getElementById('2408.12914v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Vehicular Technology, Early access (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.11882">arXiv:2408.11882</a> <span> [<a href="https://arxiv.org/pdf/2408.11882">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.21437/Interspeech.2024-524">10.21437/Interspeech.2024-524 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Prosody of speech production in latent post-stroke aphasia </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+C">Cong Zhang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tong Li</a>, <a href="/search/eess?searchtype=author&query=DeDe%2C+G">Gayle DeDe</a>, <a href="/search/eess?searchtype=author&query=Salis%2C+C">Christos Salis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.11882v2-abstract-short" style="display: inline;"> This study explores prosodic production in latent aphasia, a mild form of aphasia associated with left-hemisphere brain damage (e.g. stroke). Unlike prior research on moderate to severe aphasia, we investigated latent aphasia, which can seem to have very similar speech production with neurotypical speech. We analysed the f0, intensity and duration of utterance-initial and utterance-final words of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11882v2-abstract-full').style.display = 'inline'; document.getElementById('2408.11882v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.11882v2-abstract-full" style="display: none;"> This study explores prosodic production in latent aphasia, a mild form of aphasia associated with left-hemisphere brain damage (e.g. stroke). Unlike prior research on moderate to severe aphasia, we investigated latent aphasia, which can seem to have very similar speech production with neurotypical speech. We analysed the f0, intensity and duration of utterance-initial and utterance-final words of ten speakers with latent aphasia and ten matching controls. Regression models were fitted to improve our understanding of this understudied type of very mild aphasia. The results highlighted varying degrees of differences in all three prosodic measures between groups. We also investigated the diagnostic classification of latent aphasia versus neurotypical control using random forest, aiming to build a fast and reliable tool to assist with the identification of latent aphasia. The random forest analysis also reinforced the significance of prosodic features in distinguishing latent aphasia. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11882v2-abstract-full').style.display = 'none'; document.getElementById('2408.11882v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Interspeech 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08121">arXiv:2408.08121</a> <span> [<a href="https://arxiv.org/pdf/2408.08121">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Optimizing Highway Ramp Merge Safety and Efficiency via Spatio-Temporal Cooperative Control and Vehicle-Road Coordination </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Peng%2C+T">Ting Peng</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+X">Xiaoxue Xu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yuan Li</a>, <a href="/search/eess?searchtype=author&query=WU%2C+J">Jie WU</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tao Li</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+X">Xiang Dong</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+Y">Yincai Cai</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+P">Peng Wu</a>, <a href="/search/eess?searchtype=author&query=Ullah%2C+S">Sana Ullah</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08121v2-abstract-short" style="display: inline;"> In view of existing automatic driving is difficult to accurately and timely obtain the status and driving intention of other vehicles and the safety risk and urgency of autonomous vehicles in the absence of collision are evaluated. As a result, while vehicles generally maintain safe distances, accidents still frequently occur, particularly in merging areas. To ensure safety, improve road efficienc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08121v2-abstract-full').style.display = 'inline'; document.getElementById('2408.08121v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08121v2-abstract-full" style="display: none;"> In view of existing automatic driving is difficult to accurately and timely obtain the status and driving intention of other vehicles and the safety risk and urgency of autonomous vehicles in the absence of collision are evaluated. As a result, while vehicles generally maintain safe distances, accidents still frequently occur, particularly in merging areas. To ensure safety, improve road efficiency, this paper presents a pre-programmed technique for managing vehicles' spatiotemporal trajectories to proactively mitigate conflicts among vehicles. Firstly, the study focuses on the calculation of safe distances under varying spatiotemporal conditions, taking into account differences in vehicle speed. Subsequently, an evaluation model for vehicle conflict risk is developed, which incorporates critical parameters such as collision acceleration and emergency acceleration. The methodology further identifies the main line vehicles that are potentially in conflict with on-ramp vehicles and determines the target gap for the latter. Based on this selected target gap, a cooperative control method is formulated, enabling the pre-programming of vehicle trajectories. Using highway ramp merging as a case study, the paper introduces a mainline priority spatiotemporal cooperative control method and validates its efficacy through rigorous simulations. The analysis indicates that the average delay time can be reduced by 97.96%, and fuel consumption by 6.01%. The mainline priority strategy demonstrates increased speed, low latency and low fuel consumption. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08121v2-abstract-full').style.display = 'none'; document.getElementById('2408.08121v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.04158">arXiv:2408.04158</a> <span> [<a href="https://arxiv.org/pdf/2408.04158">pdf</a>, <a href="https://arxiv.org/format/2408.04158">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Efficient Single Image Super-Resolution with Entropy Attention and Receptive Field Augmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhao%2C+X">Xiaole Zhao</a>, <a href="/search/eess?searchtype=author&query=Li%2C+L">Linze Li</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+C">Chengxing Xie</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xiaoming Zhang</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+T">Ting Jiang</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+W">Wenjie Lin</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+S">Shuaicheng Liu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tianrui Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.04158v1-abstract-short" style="display: inline;"> Transformer-based deep models for single image super-resolution (SISR) have greatly improved the performance of lightweight SISR tasks in recent years. However, they often suffer from heavy computational burden and slow inference due to the complex calculation of multi-head self-attention (MSA), seriously hindering their practical application and deployment. In this work, we present an efficient S… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04158v1-abstract-full').style.display = 'inline'; document.getElementById('2408.04158v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.04158v1-abstract-full" style="display: none;"> Transformer-based deep models for single image super-resolution (SISR) have greatly improved the performance of lightweight SISR tasks in recent years. However, they often suffer from heavy computational burden and slow inference due to the complex calculation of multi-head self-attention (MSA), seriously hindering their practical application and deployment. In this work, we present an efficient SR model to mitigate the dilemma between model efficiency and SR performance, which is dubbed Entropy Attention and Receptive Field Augmentation network (EARFA), and composed of a novel entropy attention (EA) and a shifting large kernel attention (SLKA). From the perspective of information theory, EA increases the entropy of intermediate features conditioned on a Gaussian distribution, providing more informative input for subsequent reasoning. On the other hand, SLKA extends the receptive field of SR models with the assistance of channel shifting, which also favors to boost the diversity of hierarchical features. Since the implementation of EA and SLKA does not involve complex computations (such as extensive matrix multiplications), the proposed method can achieve faster nonlinear inference than Transformer-based SR models while maintaining better SR performance. Extensive experiments show that the proposed model can significantly reduce the delay of model inference while achieving the SR performance comparable with other advanced models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04158v1-abstract-full').style.display = 'none'; document.getElementById('2408.04158v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACM MM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.03361">arXiv:2408.03361</a> <span> [<a href="https://arxiv.org/pdf/2408.03361">pdf</a>, <a href="https://arxiv.org/format/2408.03361">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> GMAI-MMBench: A Comprehensive Multimodal Evaluation Benchmark Towards General Medical AI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+P">Pengcheng Chen</a>, <a href="/search/eess?searchtype=author&query=Ye%2C+J">Jin Ye</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+G">Guoan Wang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yanjun Li</a>, <a href="/search/eess?searchtype=author&query=Deng%2C+Z">Zhongying Deng</a>, <a href="/search/eess?searchtype=author&query=Li%2C+W">Wei Li</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tianbin Li</a>, <a href="/search/eess?searchtype=author&query=Duan%2C+H">Haodong Duan</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+Z">Ziyan Huang</a>, <a href="/search/eess?searchtype=author&query=Su%2C+Y">Yanzhou Su</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+B">Benyou Wang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+S">Shaoting Zhang</a>, <a href="/search/eess?searchtype=author&query=Fu%2C+B">Bin Fu</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+J">Jianfei Cai</a>, <a href="/search/eess?searchtype=author&query=Zhuang%2C+B">Bohan Zhuang</a>, <a href="/search/eess?searchtype=author&query=Seibel%2C+E+J">Eric J Seibel</a>, <a href="/search/eess?searchtype=author&query=He%2C+J">Junjun He</a>, <a href="/search/eess?searchtype=author&query=Qiao%2C+Y">Yu Qiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.03361v7-abstract-short" style="display: inline;"> Large Vision-Language Models (LVLMs) are capable of handling diverse data types such as imaging, text, and physiological signals, and can be applied in various fields. In the medical field, LVLMs have a high potential to offer substantial assistance for diagnosis and treatment. Before that, it is crucial to develop benchmarks to evaluate LVLMs' effectiveness in various medical applications. Curren… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03361v7-abstract-full').style.display = 'inline'; document.getElementById('2408.03361v7-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.03361v7-abstract-full" style="display: none;"> Large Vision-Language Models (LVLMs) are capable of handling diverse data types such as imaging, text, and physiological signals, and can be applied in various fields. In the medical field, LVLMs have a high potential to offer substantial assistance for diagnosis and treatment. Before that, it is crucial to develop benchmarks to evaluate LVLMs' effectiveness in various medical applications. Current benchmarks are often built upon specific academic literature, mainly focusing on a single domain, and lacking varying perceptual granularities. Thus, they face specific challenges, including limited clinical relevance, incomplete evaluations, and insufficient guidance for interactive LVLMs. To address these limitations, we developed the GMAI-MMBench, the most comprehensive general medical AI benchmark with well-categorized data structure and multi-perceptual granularity to date. It is constructed from 284 datasets across 38 medical image modalities, 18 clinical-related tasks, 18 departments, and 4 perceptual granularities in a Visual Question Answering (VQA) format. Additionally, we implemented a lexical tree structure that allows users to customize evaluation tasks, accommodating various assessment needs and substantially supporting medical AI research and applications. We evaluated 50 LVLMs, and the results show that even the advanced GPT-4o only achieves an accuracy of 53.96%, indicating significant room for improvement. Moreover, we identified five key insufficiencies in current cutting-edge LVLMs that need to be addressed to advance the development of better medical applications. We believe that GMAI-MMBench will stimulate the community to build the next generation of LVLMs toward GMAI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03361v7-abstract-full').style.display = 'none'; document.getElementById('2408.03361v7-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">GitHub: https://github.com/uni-medical/GMAI-MMBench Hugging face: https://huggingface.co/datasets/OpenGVLab/GMAI-MMBench</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.02208">arXiv:2408.02208</a> <span> [<a href="https://arxiv.org/pdf/2408.02208">pdf</a>, <a href="https://arxiv.org/format/2408.02208">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Physics and Society">physics.soc-ph</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.trc.2024.104804">10.1016/j.trc.2024.104804 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Multi-level Traffic-Responsive Tilt Camera Surveillance through Predictive Correlated Online Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+T">Tao Li</a>, <a href="/search/eess?searchtype=author&query=Bian%2C+Z">Zilin Bian</a>, <a href="/search/eess?searchtype=author&query=Lei%2C+H">Haozhe Lei</a>, <a href="/search/eess?searchtype=author&query=Zuo%2C+F">Fan Zuo</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+Y">Ya-Ting Yang</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+Q">Quanyan Zhu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zhenning Li</a>, <a href="/search/eess?searchtype=author&query=Ozbay%2C+K">Kaan Ozbay</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.02208v1-abstract-short" style="display: inline;"> In urban traffic management, the primary challenge of dynamically and efficiently monitoring traffic conditions is compounded by the insufficient utilization of thousands of surveillance cameras along the intelligent transportation system. This paper introduces the multi-level Traffic-responsive Tilt Camera surveillance system (TTC-X), a novel framework designed for dynamic and efficient monitorin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02208v1-abstract-full').style.display = 'inline'; document.getElementById('2408.02208v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.02208v1-abstract-full" style="display: none;"> In urban traffic management, the primary challenge of dynamically and efficiently monitoring traffic conditions is compounded by the insufficient utilization of thousands of surveillance cameras along the intelligent transportation system. This paper introduces the multi-level Traffic-responsive Tilt Camera surveillance system (TTC-X), a novel framework designed for dynamic and efficient monitoring and management of traffic in urban networks. By leveraging widely deployed pan-tilt-cameras (PTCs), TTC-X overcomes the limitations of a fixed field of view in traditional surveillance systems by providing mobilized and 360-degree coverage. The innovation of TTC-X lies in the integration of advanced machine learning modules, including a detector-predictor-controller structure, with a novel Predictive Correlated Online Learning (PiCOL) methodology and the Spatial-Temporal Graph Predictor (STGP) for real-time traffic estimation and PTC control. The TTC-X is tested and evaluated under three experimental scenarios (e.g., maximum traffic flow capture, dynamic route planning, traffic state estimation) based on a simulation environment calibrated using real-world traffic data in Brooklyn, New York. The experimental results showed that TTC-X captured over 60\% total number of vehicles at the network level, dynamically adjusted its route recommendation in reaction to unexpected full-lane closure events, and reconstructed link-level traffic states with best MAE less than 1.25 vehicle/hour. Demonstrating scalability, cost-efficiency, and adaptability, TTC-X emerges as a powerful solution for urban traffic management in both cyber-physical and real-world environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02208v1-abstract-full').style.display = 'none'; document.getElementById('2408.02208v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to Transportation Research Part C special issue: Modelling, Learning, and Control of Conventional, Cooperative and Automated Motorway and Urban Traffic Systems</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.20748">arXiv:2407.20748</a> <span> [<a href="https://arxiv.org/pdf/2407.20748">pdf</a>, <a href="https://arxiv.org/format/2407.20748">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Task-Oriented Communication for Vehicle-to-Infrastructure Cooperative Perception </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Shao%2C+J">Jiawei Shao</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Teng Li</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jun Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.20748v1-abstract-short" style="display: inline;"> Vehicle-to-infrastructure (V2I) cooperative perception plays a crucial role in autonomous driving scenarios. Despite its potential to improve perception accuracy and robustness, the large amount of raw sensor data inevitably results in high communication overhead. To mitigate this issue, we propose TOCOM-V2I, a task-oriented communication framework for V2I cooperative perception, which reduces ban… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20748v1-abstract-full').style.display = 'inline'; document.getElementById('2407.20748v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.20748v1-abstract-full" style="display: none;"> Vehicle-to-infrastructure (V2I) cooperative perception plays a crucial role in autonomous driving scenarios. Despite its potential to improve perception accuracy and robustness, the large amount of raw sensor data inevitably results in high communication overhead. To mitigate this issue, we propose TOCOM-V2I, a task-oriented communication framework for V2I cooperative perception, which reduces bandwidth consumption by transmitting only task-relevant information, instead of the raw data stream, for perceiving the surrounding environment. Our contributions are threefold. First, we propose a spatial-aware feature selection module to filter out irrelevant information based on spatial relationships and perceptual prior. Second, we introduce a hierarchical entropy model to exploit redundancy within the features for efficient compression and transmission. Finally, we utilize a scaled dot-product attention architecture to fuse vehicle-side and infrastructure-side features to improve perception performance. Experimental results demonstrate the effectiveness of TOCOM-V2I. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20748v1-abstract-full').style.display = 'none'; document.getElementById('2407.20748v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.19867">arXiv:2407.19867</a> <span> [<a href="https://arxiv.org/pdf/2407.19867">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Design and Testing for Steel Support Axial Force Servo System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ullah%2C+S">Sana Ullah</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+Y">Yonghong Zhou</a>, <a href="/search/eess?searchtype=author&query=Lai%2C+M">Maokai Lai</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+X">Xiang Dong</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tao Li</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+X">Xiaoxue Xu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yuan Li</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+T">Ting Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.19867v1-abstract-short" style="display: inline;"> Foundation excavations are deepening, expanding, and approaching structures. Steel supports measure and manage axial force. The study regulates steel support structure power during deep excavation using a novel axial force management system for safety, efficiency, and structural integrity. Closed-loop control changes actuator output to maintain axial force based on force. In deep excavation, the s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19867v1-abstract-full').style.display = 'inline'; document.getElementById('2407.19867v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.19867v1-abstract-full" style="display: none;"> Foundation excavations are deepening, expanding, and approaching structures. Steel supports measure and manage axial force. The study regulates steel support structure power during deep excavation using a novel axial force management system for safety, efficiency, and structural integrity. Closed-loop control changes actuator output to maintain axial force based on force. In deep excavation, the servo system regulates unstable soil, side pressure, and structural demands. Modern engineering and tech are used. Temperature changes automatically adjust the jack to maintain axial force. Includes hydraulic jacks, triple-acting cylinders, temperature, and deformation sensors, and automatic control. Foundation pit excavation is dynamic, yet structure tension is constant. There is no scientific way to regulate axial force foundation pit excavation. The revolutionary Servo system adjusts temperature, compression, and axial force to deform pits. System control requires foundation pit direction detection and modification. This engineering method has performed effectively for deep foundation pit excavation at railway crossings and other infrastructure projects. The surrounding protective structure may reduce the steel support's axial stress, making deep foundation excavation safe and efficient. Keywords: Servo systems, Steel strut support design, Deformation control, Monitoring and control, Deep excavation projects. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19867v1-abstract-full').style.display = 'none'; document.getElementById('2407.19867v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages,7 figures, 1 table, 2 graph, conference paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.14340">arXiv:2407.14340</a> <span> [<a href="https://arxiv.org/pdf/2407.14340">pdf</a>, <a href="https://arxiv.org/format/2407.14340">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/CVPRW59228.2023.00135">10.1109/CVPRW59228.2023.00135 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Large Kernel Distillation Network for Efficient Single Image Super-Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xie%2C+C">Chengxing Xie</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xiaoming Zhang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+L">Linze Li</a>, <a href="/search/eess?searchtype=author&query=Meng%2C+H">Haiteng Meng</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+T">Tianlin Zhang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tianrui Li</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+X">Xiaole Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.14340v1-abstract-short" style="display: inline;"> Efficient and lightweight single-image super-resolution (SISR) has achieved remarkable performance in recent years. One effective approach is the use of large kernel designs, which have been shown to improve the performance of SISR models while reducing their computational requirements. However, current state-of-the-art (SOTA) models still face problems such as high computational costs. To address… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14340v1-abstract-full').style.display = 'inline'; document.getElementById('2407.14340v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.14340v1-abstract-full" style="display: none;"> Efficient and lightweight single-image super-resolution (SISR) has achieved remarkable performance in recent years. One effective approach is the use of large kernel designs, which have been shown to improve the performance of SISR models while reducing their computational requirements. However, current state-of-the-art (SOTA) models still face problems such as high computational costs. To address these issues, we propose the Large Kernel Distillation Network (LKDN) in this paper. Our approach simplifies the model structure and introduces more efficient attention modules to reduce computational costs while also improving performance. Specifically, we employ the reparameterization technique to enhance model performance without adding extra cost. We also introduce a new optimizer from other tasks to SISR, which improves training speed and performance. Our experimental results demonstrate that LKDN outperforms existing lightweight SR methods and achieves SOTA performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14340v1-abstract-full').style.display = 'none'; document.getElementById('2407.14340v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to CVPR workshop 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.10427">arXiv:2407.10427</a> <span> [<a href="https://arxiv.org/pdf/2407.10427">pdf</a>, <a href="https://arxiv.org/format/2407.10427">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Transformer for Multitemporal Hyperspectral Image Unmixing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+H">Hang Li</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+Q">Qiankun Dong</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+X">Xueshuo Xie</a>, <a href="/search/eess?searchtype=author&query=Xu%2C+X">Xia Xu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tao Li</a>, <a href="/search/eess?searchtype=author&query=Shi%2C+Z">Zhenwei Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.10427v1-abstract-short" style="display: inline;"> Multitemporal hyperspectral image unmixing (MTHU) holds significant importance in monitoring and analyzing the dynamic changes of surface. However, compared to single-temporal unmixing, the multitemporal approach demands comprehensive consideration of information across different phases, rendering it a greater challenge. To address this challenge, we propose the Multitemporal Hyperspectral Image U… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10427v1-abstract-full').style.display = 'inline'; document.getElementById('2407.10427v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.10427v1-abstract-full" style="display: none;"> Multitemporal hyperspectral image unmixing (MTHU) holds significant importance in monitoring and analyzing the dynamic changes of surface. However, compared to single-temporal unmixing, the multitemporal approach demands comprehensive consideration of information across different phases, rendering it a greater challenge. To address this challenge, we propose the Multitemporal Hyperspectral Image Unmixing Transformer (MUFormer), an end-to-end unsupervised deep learning model. To effectively perform multitemporal hyperspectral image unmixing, we introduce two key modules: the Global Awareness Module (GAM) and the Change Enhancement Module (CEM). The Global Awareness Module computes self-attention across all phases, facilitating global weight allocation. On the other hand, the Change Enhancement Module dynamically learns local temporal changes by comparing endmember changes between adjacent phases. The synergy between these modules allows for capturing semantic information regarding endmember and abundance changes, thereby enhancing the effectiveness of multitemporal hyperspectral image unmixing. We conducted experiments on one real dataset and two synthetic datasets, demonstrating that our model significantly enhances the effect of multitemporal hyperspectral image unmixing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10427v1-abstract-full').style.display = 'none'; document.getElementById('2407.10427v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.06116">arXiv:2407.06116</a> <span> [<a href="https://arxiv.org/pdf/2407.06116">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Data-driven Nucleus Subclassification on Colon H&E using Style-transferred Digital Pathology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Remedios%2C+L+W">Lucas W. Remedios</a>, <a href="/search/eess?searchtype=author&query=Bao%2C+S">Shunxing Bao</a>, <a href="/search/eess?searchtype=author&query=Remedios%2C+S+W">Samuel W. Remedios</a>, <a href="/search/eess?searchtype=author&query=Lee%2C+H+H">Ho Hin Lee</a>, <a href="/search/eess?searchtype=author&query=Cai%2C+L+Y">Leon Y. Cai</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Thomas Li</a>, <a href="/search/eess?searchtype=author&query=Deng%2C+R">Ruining Deng</a>, <a href="/search/eess?searchtype=author&query=Newlin%2C+N+R">Nancy R. Newlin</a>, <a href="/search/eess?searchtype=author&query=Saunders%2C+A+M">Adam M. Saunders</a>, <a href="/search/eess?searchtype=author&query=Cui%2C+C">Can Cui</a>, <a href="/search/eess?searchtype=author&query=Li%2C+J">Jia Li</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+Q">Qi Liu</a>, <a href="/search/eess?searchtype=author&query=Lau%2C+K+S">Ken S. Lau</a>, <a href="/search/eess?searchtype=author&query=Roland%2C+J+T">Joseph T. Roland</a>, <a href="/search/eess?searchtype=author&query=Washington%2C+M+K">Mary K Washington</a>, <a href="/search/eess?searchtype=author&query=Coburn%2C+L+A">Lori A. Coburn</a>, <a href="/search/eess?searchtype=author&query=Wilson%2C+K+T">Keith T. Wilson</a>, <a href="/search/eess?searchtype=author&query=Huo%2C+Y">Yuankai Huo</a>, <a href="/search/eess?searchtype=author&query=Landman%2C+B+A">Bennett A. Landman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.06116v1-abstract-short" style="display: inline;"> Understanding the way cells communicate, co-locate, and interrelate is essential to furthering our understanding of how the body functions. H&E is widely available, however, cell subtyping often requires expert knowledge and the use of specialized stains. To reduce the annotation burden, AI has been proposed for the classification of cells on H&E. For example, the recent Colon Nucleus Identificati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06116v1-abstract-full').style.display = 'inline'; document.getElementById('2407.06116v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.06116v1-abstract-full" style="display: none;"> Understanding the way cells communicate, co-locate, and interrelate is essential to furthering our understanding of how the body functions. H&E is widely available, however, cell subtyping often requires expert knowledge and the use of specialized stains. To reduce the annotation burden, AI has been proposed for the classification of cells on H&E. For example, the recent Colon Nucleus Identification and Classification (CoNIC) Challenge focused on labeling 6 cell types on H&E of the colon. However, the CoNIC Challenge was unable to classify epithelial subtypes (progenitor, enteroendocrine, goblet), lymphocyte subtypes (B, helper T, cytotoxic T), and connective subtypes (fibroblasts). We use inter-modality learning to label previously un-labelable cell types on H&E. We take advantage of multiplexed immunofluorescence (MxIF) histology to label 14 cell subclasses. We performed style transfer on the same MxIF tissues to synthesize realistic virtual H&E which we paired with the MxIF-derived cell subclassification labels. We evaluated the efficacy of using a supervised learning scheme where the input was realistic-quality virtual H&E and the labels were MxIF-derived cell subclasses. We assessed our model on private virtual H&E and public real H&E. On virtual H&E, we were able to classify helper T cells and epithelial progenitors with positive predictive values of $0.34 \pm 0.15$ (prevalence $0.03 \pm 0.01$) and $0.47 \pm 0.1$ (prevalence $0.07 \pm 0.02$) respectively, when using ground truth centroid information. On real H&E we could classify helper T cells and epithelial progenitors with upper bound positive predictive values of $0.43 \pm 0.03$ (parent class prevalence 0.21) and $0.94 \pm 0.02$ (parent class prevalence 0.49) when using ground truth centroid information. This is the first work to provide cell type classification for helper T and epithelial progenitor nuclei on H&E. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06116v1-abstract-full').style.display = 'none'; document.getElementById('2407.06116v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2401.05602</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.04675">arXiv:2407.04675</a> <span> [<a href="https://arxiv.org/pdf/2407.04675">pdf</a>, <a href="https://arxiv.org/format/2407.04675">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Seed-ASR: Understanding Diverse Speech and Contexts with LLM-based Speech Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Bai%2C+Y">Ye Bai</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+J">Jingping Chen</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+J">Jitong Chen</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+W">Wei Chen</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z">Zhuo Chen</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+C">Chuang Ding</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+L">Linhao Dong</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+Q">Qianqian Dong</a>, <a href="/search/eess?searchtype=author&query=Du%2C+Y">Yujiao Du</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+K">Kepan Gao</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+L">Lu Gao</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+Y">Yi Guo</a>, <a href="/search/eess?searchtype=author&query=Han%2C+M">Minglun Han</a>, <a href="/search/eess?searchtype=author&query=Han%2C+T">Ting Han</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+W">Wenchao Hu</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+X">Xinying Hu</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+Y">Yuxiang Hu</a>, <a href="/search/eess?searchtype=author&query=Hua%2C+D">Deyu Hua</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+L">Lu Huang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+M">Mingkun Huang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+Y">Youjia Huang</a>, <a href="/search/eess?searchtype=author&query=Jin%2C+J">Jishuo Jin</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+F">Fanliu Kong</a>, <a href="/search/eess?searchtype=author&query=Lan%2C+Z">Zongwei Lan</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tianyu Li</a> , et al. (30 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.04675v2-abstract-short" style="display: inline;"> Modern automatic speech recognition (ASR) model is required to accurately transcribe diverse speech signals (from different domains, languages, accents, etc) given the specific contextual information in various application scenarios. Classic end-to-end models fused with extra language models perform well, but mainly in data matching scenarios and are gradually approaching a bottleneck. In this wor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04675v2-abstract-full').style.display = 'inline'; document.getElementById('2407.04675v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.04675v2-abstract-full" style="display: none;"> Modern automatic speech recognition (ASR) model is required to accurately transcribe diverse speech signals (from different domains, languages, accents, etc) given the specific contextual information in various application scenarios. Classic end-to-end models fused with extra language models perform well, but mainly in data matching scenarios and are gradually approaching a bottleneck. In this work, we introduce Seed-ASR, a large language model (LLM) based speech recognition model. Seed-ASR is developed based on the framework of audio conditioned LLM (AcLLM), leveraging the capabilities of LLMs by inputting continuous speech representations together with contextual information into the LLM. Through stage-wise large-scale training and the elicitation of context-aware capabilities in LLM, Seed-ASR demonstrates significant improvement over end-to-end models on comprehensive evaluation sets, including multiple domains, accents/dialects and languages. Additionally, Seed-ASR can be further deployed to support specific needs in various scenarios without requiring extra language models. Compared to recently released large ASR models, Seed-ASR achieves 10%-40% reduction in word (or character, for Chinese) error rates on Chinese and English public test sets, further demonstrating its powerful performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04675v2-abstract-full').style.display = 'none'; document.getElementById('2407.04675v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.03671">arXiv:2407.03671</a> <span> [<a href="https://arxiv.org/pdf/2407.03671">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Spatio-temporal cooperative control Method of Highway Ramp Merge Based on Vehicle-road Coordination </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Xu%2C+X">Xiaoxue Xu</a>, <a href="/search/eess?searchtype=author&query=Lai%2C+M">Maokai Lai</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+H">Haitao Zhang</a>, <a href="/search/eess?searchtype=author&query=Dong%2C+X">Xiang Dong</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tao Li</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+J">Jie Wu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Y">Yuan Li</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+T">Ting Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.03671v2-abstract-short" style="display: inline;"> The merging area of highway ramps faces multiple challenges, including traffic congestion, collision risks, speed mismatches, driver behavior uncertainties, limited visibility, and bottleneck effects. However, autonomous vehicles engaging in depth coordination between vehicle and road in merging zones, by pre-planning and uploading travel trajectories, can significantly enhance the safety and effi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03671v2-abstract-full').style.display = 'inline'; document.getElementById('2407.03671v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.03671v2-abstract-full" style="display: none;"> The merging area of highway ramps faces multiple challenges, including traffic congestion, collision risks, speed mismatches, driver behavior uncertainties, limited visibility, and bottleneck effects. However, autonomous vehicles engaging in depth coordination between vehicle and road in merging zones, by pre-planning and uploading travel trajectories, can significantly enhance the safety and efficiency of merging zones.In this paper,we mainly introduce mainline priority cooperation method to achieve the time and space cooperative control of highway merge.Vehicle-mounted intelligent units share real-time vehicle status and driving intentions with Road Section Management Units, which pre-plan the spatiotemporal trajectories of vehicle travel. After receiving these trajectories, Vehicle Intelligent Units strictly adhere to them. Through this deep collaboration between vehicles and roads, conflicts in time and space during vehicle travel are eliminated in advance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03671v2-abstract-full').style.display = 'none'; document.getElementById('2407.03671v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.02830">arXiv:2407.02830</a> <span> [<a href="https://arxiv.org/pdf/2407.02830">pdf</a>, <a href="https://arxiv.org/format/2407.02830">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> A Radiometric Correction based Optical Modeling Approach to Removing Reflection Noise in TLS Point Clouds of Urban Scenes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Fang%2C+L">Li Fang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tianyu Li</a>, <a href="/search/eess?searchtype=author&query=Lin%2C+Y">Yanghong Lin</a>, <a href="/search/eess?searchtype=author&query=Zhou%2C+S">Shudong Zhou</a>, <a href="/search/eess?searchtype=author&query=Yao%2C+W">Wei Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.02830v1-abstract-short" style="display: inline;"> Point clouds are vital in computer vision tasks such as 3D reconstruction, autonomous driving, and robotics. However, TLS-acquired point clouds often contain virtual points from reflective surfaces, causing disruptions. This study presents a reflection noise elimination algorithm for TLS point clouds. Our innovative reflection plane detection algorithm, based on geometry-optical models and physica… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02830v1-abstract-full').style.display = 'inline'; document.getElementById('2407.02830v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.02830v1-abstract-full" style="display: none;"> Point clouds are vital in computer vision tasks such as 3D reconstruction, autonomous driving, and robotics. However, TLS-acquired point clouds often contain virtual points from reflective surfaces, causing disruptions. This study presents a reflection noise elimination algorithm for TLS point clouds. Our innovative reflection plane detection algorithm, based on geometry-optical models and physical properties, identifies and categorizes reflection points per optical reflection theory. We've adapted the LSFH feature descriptor to retain reflection features, mitigating interference from symmetrical architectural structures. By incorporating the Hausdorff feature distance, the algorithm enhances resilience to ghosting and deformation, improving virtual point detection accuracy. Extensive experiments on the 3DRN benchmark dataset, featuring diverse urban environments with virtual TLS reflection noise, show our algorithm improves precision and recall rates for 3D points in reflective regions by 57.03\% and 31.80\%, respectively. Our method achieves a 9.17\% better outlier detection rate and 5.65\% higher accuracy than leading methods. Access the 3DRN dataset at (https://github.com/Tsuiky/3DRN). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02830v1-abstract-full').style.display = 'none'; document.getElementById('2407.02830v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.02765">arXiv:2407.02765</a> <span> [<a href="https://arxiv.org/pdf/2407.02765">pdf</a>, <a href="https://arxiv.org/ps/2407.02765">ps</a>, <a href="https://arxiv.org/format/2407.02765">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> </div> </div> <p class="title is-5 mathjax"> Graphon Particle Systems, Part II: Dynamics of Distributed Stochastic Continuum Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yan Chen</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tao Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.02765v1-abstract-short" style="display: inline;"> We study the distributed optimization problem over a graphon with a continuum of nodes, which is regarded as the limit of the distributed networked optimization as the number of nodes goes to infinity. Each node has a private local cost function. The global cost function, which all nodes cooperatively minimize, is the integral of the local cost functions on the node set. We propose stochastic grad… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02765v1-abstract-full').style.display = 'inline'; document.getElementById('2407.02765v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.02765v1-abstract-full" style="display: none;"> We study the distributed optimization problem over a graphon with a continuum of nodes, which is regarded as the limit of the distributed networked optimization as the number of nodes goes to infinity. Each node has a private local cost function. The global cost function, which all nodes cooperatively minimize, is the integral of the local cost functions on the node set. We propose stochastic gradient descent and gradient tracking algorithms over the graphon. We establish a general lemma for the upper bound estimation related to a class of time-varying differential inequalities with negative linear terms, based upon which, we prove that for both kinds of algorithms, the second moments of the nodes' states are uniformly bounded. Especially, for the stochastic gradient tracking algorithm, we transform the convergence analysis into the asymptotic property of coupled nonlinear differential inequalities with time-varying coefficients and develop a decoupling method. For both kinds of algorithms, we show that by choosing the time-varying algorithm gains properly, all nodes' states achieve $\mathcal{L}^{\infty}$-consensus for a connected graphon. Furthermore, if the local cost functions are strongly convex, then all nodes' states converge to the minimizer of the global cost function and the auxiliary states in the stochastic gradient tracking algorithm converge to the gradient value of the global cost function at the minimizer uniformly in mean square. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02765v1-abstract-full').style.display = 'none'; document.getElementById('2407.02765v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.07952">arXiv:2406.07952</a> <span> [<a href="https://arxiv.org/pdf/2406.07952">pdf</a>, <a href="https://arxiv.org/format/2406.07952">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Spatial-Frequency Dual Progressive Attention Network For Medical Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhou%2C+Z">Zhenhuan Zhou</a>, <a href="/search/eess?searchtype=author&query=He%2C+A">Along He</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yanlin Wu</a>, <a href="/search/eess?searchtype=author&query=Yao%2C+R">Rui Yao</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+X">Xueshuo Xie</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tao Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.07952v2-abstract-short" style="display: inline;"> In medical images, various types of lesions often manifest significant differences in their shape and texture. Accurate medical image segmentation demands deep learning models with robust capabilities in multi-scale and boundary feature learning. However, previous networks still have limitations in addressing the above issues. Firstly, previous networks simultaneously fuse multi-level features or… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07952v2-abstract-full').style.display = 'inline'; document.getElementById('2406.07952v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.07952v2-abstract-full" style="display: none;"> In medical images, various types of lesions often manifest significant differences in their shape and texture. Accurate medical image segmentation demands deep learning models with robust capabilities in multi-scale and boundary feature learning. However, previous networks still have limitations in addressing the above issues. Firstly, previous networks simultaneously fuse multi-level features or employ deep supervision to enhance multi-scale learning. However, this may lead to feature redundancy and excessive computational overhead, which is not conducive to network training and clinical deployment. Secondly, the majority of medical image segmentation networks exclusively learn features in the spatial domain, disregarding the abundant global information in the frequency domain. This results in a bias towards low-frequency components, neglecting crucial high-frequency information. To address these problems, we introduce SF-UNet, a spatial-frequency dual-domain attention network. It comprises two main components: the Multi-scale Progressive Channel Attention (MPCA) block, which progressively extract multi-scale features across adjacent encoder layers, and the lightweight Frequency-Spatial Attention (FSA) block, with only 0.05M parameters, enabling concurrent learning of texture and boundary features from both spatial and frequency domains. We validate the effectiveness of the proposed SF-UNet on three public datasets. Experimental results show that compared to previous state-of-the-art (SOTA) medical image segmentation networks, SF-UNet achieves the best performance, and achieves up to 9.4\% and 10.78\% improvement in DSC and IOU. Codes will be released at https://github.com/nkicsl/SF-UNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07952v2-abstract-full').style.display = 'none'; document.getElementById('2406.07952v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages accepted by 2024 IEEE International Conference on Bioinformatics and Biomedicine (BIBM 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.05681">arXiv:2406.05681</a> <span> [<a href="https://arxiv.org/pdf/2406.05681">pdf</a>, <a href="https://arxiv.org/format/2406.05681">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Towards Expressive Zero-Shot Speech Synthesis with Hierarchical Prosody Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Jiang%2C+Y">Yuepeng Jiang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tao Li</a>, <a href="/search/eess?searchtype=author&query=Yang%2C+F">Fengyu Yang</a>, <a href="/search/eess?searchtype=author&query=Xie%2C+L">Lei Xie</a>, <a href="/search/eess?searchtype=author&query=Meng%2C+M">Meng Meng</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yujun Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.05681v2-abstract-short" style="display: inline;"> Recent research in zero-shot speech synthesis has made significant progress in speaker similarity. However, current efforts focus on timbre generalization rather than prosody modeling, which results in limited naturalness and expressiveness. To address this, we introduce a novel speech synthesis model trained on large-scale datasets, including both timbre and hierarchical prosody modeling. As timb… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05681v2-abstract-full').style.display = 'inline'; document.getElementById('2406.05681v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.05681v2-abstract-full" style="display: none;"> Recent research in zero-shot speech synthesis has made significant progress in speaker similarity. However, current efforts focus on timbre generalization rather than prosody modeling, which results in limited naturalness and expressiveness. To address this, we introduce a novel speech synthesis model trained on large-scale datasets, including both timbre and hierarchical prosody modeling. As timbre is a global attribute closely linked to expressiveness, we adopt a global vector to model speaker timbre while guiding prosody modeling. Besides, given that prosody contains both global consistency and local variations, we introduce a diffusion model as the pitch predictor and employ a prosody adaptor to model prosody hierarchically, further enhancing the prosody quality of the synthesized speech. Experimental results show that our model not only maintains comparable timbre quality to the baseline but also exhibits better naturalness and expressiveness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05681v2-abstract-full').style.display = 'none'; document.getElementById('2406.05681v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 2 figures, accepted by Interspeech2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.19363">arXiv:2405.19363</a> <span> [<a href="https://arxiv.org/pdf/2405.19363">pdf</a>, <a href="https://arxiv.org/format/2405.19363">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Medformer: A Multi-Granularity Patching Transformer for Medical Time-Series Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yihe Wang</a>, <a href="/search/eess?searchtype=author&query=Huang%2C+N">Nan Huang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Taida Li</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+Y">Yujun Yan</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xiang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.19363v2-abstract-short" style="display: inline;"> Medical time series (MedTS) data, such as Electroencephalography (EEG) and Electrocardiography (ECG), play a crucial role in healthcare, such as diagnosing brain and heart diseases. Existing methods for MedTS classification primarily rely on handcrafted biomarkers extraction and CNN-based models, with limited exploration of transformer-based models. In this paper, we introduce Medformer, a multi-g… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19363v2-abstract-full').style.display = 'inline'; document.getElementById('2405.19363v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.19363v2-abstract-full" style="display: none;"> Medical time series (MedTS) data, such as Electroencephalography (EEG) and Electrocardiography (ECG), play a crucial role in healthcare, such as diagnosing brain and heart diseases. Existing methods for MedTS classification primarily rely on handcrafted biomarkers extraction and CNN-based models, with limited exploration of transformer-based models. In this paper, we introduce Medformer, a multi-granularity patching transformer tailored specifically for MedTS classification. Our method incorporates three novel mechanisms to leverage the unique characteristics of MedTS: cross-channel patching to leverage inter-channel correlations, multi-granularity embedding for capturing features at different scales, and two-stage (intra- and inter-granularity) multi-granularity self-attention for learning features and correlations within and among granularities. We conduct extensive experiments on five public datasets under both subject-dependent and challenging subject-independent setups. Results demonstrate Medformer's superiority over 10 baselines, achieving top averaged ranking across five datasets on all six evaluation metrics. These findings underscore the significant impact of our method on healthcare applications, such as diagnosing Myocardial Infarction, Alzheimer's, and Parkinson's disease. We release the source code at https://github.com/DL4mHealth/Medformer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19363v2-abstract-full').style.display = 'none'; document.getElementById('2405.19363v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages (15 pages main paper + 6 pages supplementary materials)</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Neurips 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16760">arXiv:2405.16760</a> <span> [<a href="https://arxiv.org/pdf/2405.16760">pdf</a>, <a href="https://arxiv.org/ps/2405.16760">ps</a>, <a href="https://arxiv.org/format/2405.16760">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> </div> </div> <p class="title is-5 mathjax"> Graphon Particle Systems, Part I: Spatio-Temporal Approximation and Law of Large Numbers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+Y">Yan Chen</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tao Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16760v2-abstract-short" style="display: inline;"> We study a class of graphon particle systems with time-varying random coefficients. In a graphon particle system, the interactions among particles are characterized by the coupled mean field terms through an underlying graphon and the randomness of the coefficients comes from the stochastic processes associated with the particle labels. By constructing two-level approximated sequences converging i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16760v2-abstract-full').style.display = 'inline'; document.getElementById('2405.16760v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16760v2-abstract-full" style="display: none;"> We study a class of graphon particle systems with time-varying random coefficients. In a graphon particle system, the interactions among particles are characterized by the coupled mean field terms through an underlying graphon and the randomness of the coefficients comes from the stochastic processes associated with the particle labels. By constructing two-level approximated sequences converging in 2-Wasserstein distance, we prove the existence and uniqueness of the solution to the system. Besides, by constructing two-level approximated functions converging to the graphon mean field terms, we establish the law of large numbers, which reveals that if the number of particles tends to infinity and the discretization step tends to zero, then the discrete-time interacting particle system over a large-scale network converges to the graphon particle system. As a byproduct, we discover that the graphon particle system can describe the limiting dynamics of the distributed stochastic gradient descent algorithm over the large-scale network and prove that if the gradients of the local cost functions are Lipschitz continuous, then the graphon particle system can be regarded as the spatio-temporal approximation of the discrete-time distributed stochastic gradient descent algorithm as the number of network nodes tends to infinity and the algorithm step size tends to zero. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16760v2-abstract-full').style.display = 'none'; document.getElementById('2405.16760v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.11935">arXiv:2405.11935</a> <span> [<a href="https://arxiv.org/pdf/2405.11935">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applied Physics">physics.app-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> </div> </div> <p class="title is-5 mathjax"> A Flat Dual-Polarized Millimeter-Wave Luneburg Lens Antenna Using Transformation Optics with Reduced Anisotropy and Impedance Mismatch </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Su%2C+Y">Yuanyan Su</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Teng Li</a>, <a href="/search/eess?searchtype=author&query=Hong%2C+W">Wei Hong</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+Z+N">Zhi Ning Chen</a>, <a href="/search/eess?searchtype=author&query=Skrivervik%2C+A+K">Anja K. Skrivervik</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.11935v1-abstract-short" style="display: inline;"> In this paper, a compact wideband dual-polarized Luneburg lens antenna (LLA) with reduced anisotropy and improved impedance matching is proposed in Ka band with a wide 2D beamscanning capability. Based on transformation optics, the spherical Luneburg lens is compressed into a cylindrical one, while the merits of high gain, broad band, wide scanning, and free polarization are preserved. A trigonome… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11935v1-abstract-full').style.display = 'inline'; document.getElementById('2405.11935v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.11935v1-abstract-full" style="display: none;"> In this paper, a compact wideband dual-polarized Luneburg lens antenna (LLA) with reduced anisotropy and improved impedance matching is proposed in Ka band with a wide 2D beamscanning capability. Based on transformation optics, the spherical Luneburg lens is compressed into a cylindrical one, while the merits of high gain, broad band, wide scanning, and free polarization are preserved. A trigonometric function is employed to the material property of the flattened Luneburg lens with reduced anisotropy, thus effectively alleviates the strong reflection, the high sidelobes and back radiation with a free cost on the antenna weight and volume. Furthermore, a light thin wideband 7-by-1 metasurface phased array is studied as the primary feed for the LLA. The proposed metantenna, shorted for metamaterial-based antenna, has a high potential for B5G, future wireless communication and radar sensing as an onboard system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11935v1-abstract-full').style.display = 'none'; document.getElementById('2405.11935v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.11883">arXiv:2405.11883</a> <span> [<a href="https://arxiv.org/pdf/2405.11883">pdf</a>, <a href="https://arxiv.org/format/2405.11883">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Asynchronous MIMO-OFDM Massive Unsourced Random Access with Codeword Collisions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+T">Tianya Li</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Y">Yongpeng Wu</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+J">Junyuan Gao</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+W">Wenjun Zhang</a>, <a href="/search/eess?searchtype=author&query=Xia%2C+X">Xiang-Gen Xia</a>, <a href="/search/eess?searchtype=author&query=Ng%2C+D+W+K">Derrick Wing Kwan Ng</a>, <a href="/search/eess?searchtype=author&query=Xiao%2C+C">Chengshan Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.11883v2-abstract-short" style="display: inline;"> This paper investigates asynchronous multiple-input multiple-output (MIMO) massive unsourced random access (URA) in an orthogonal frequency division multiplexing (OFDM) system over frequency-selective fading channels, with the presence of both timing and carrier frequency offsets (TO and CFO) and non-negligible codeword collisions. The proposed coding framework segregates the data into two compone… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11883v2-abstract-full').style.display = 'inline'; document.getElementById('2405.11883v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.11883v2-abstract-full" style="display: none;"> This paper investigates asynchronous multiple-input multiple-output (MIMO) massive unsourced random access (URA) in an orthogonal frequency division multiplexing (OFDM) system over frequency-selective fading channels, with the presence of both timing and carrier frequency offsets (TO and CFO) and non-negligible codeword collisions. The proposed coding framework segregates the data into two components, namely, preamble and coding parts, with the former being tree-coded and the latter LDPC-coded. By leveraging the dual sparsity of the equivalent channel across both codeword and delay domains (CD and DD), we develop a message-passing-based sparse Bayesian learning algorithm, combined with belief propagation and mean field, to iteratively estimate DD channel responses, TO, and delay profiles. Furthermore, by jointly leveraging the observations among multiple slots, we establish a novel graph-based algorithm to iteratively separate the superimposed channels and compensate for the phase rotations. Additionally, the proposed algorithm is applied to the flat fading scenario to estimate both TO and CFO, where the channel and offset estimation is enhanced by leveraging the geometric characteristics of the signal constellation. Extensive simulations reveal that the proposed algorithm achieves superior performance and substantial complexity reduction in both channel and offset estimation compared to the codebook enlarging-based counterparts, and enhanced data recovery performances compared to state-of-the-art URA schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11883v2-abstract-full').style.display = 'none'; document.getElementById('2405.11883v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by the IEEE Transactions on Wireless Communications</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.03211">arXiv:2404.03211</a> <span> [<a href="https://arxiv.org/pdf/2404.03211">pdf</a>, <a href="https://arxiv.org/format/2404.03211">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Convergence Conditions of Online Regularized Statistical Learning in Reproducing Kernel Hilbert Space With Non-Stationary Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xiwei Zhang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tao Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.03211v4-abstract-short" style="display: inline;"> We study the convergence of recursive regularized learning algorithms in the reproducing kernel Hilbert space (RKHS) with dependent and non-stationary online data streams. Firstly, we study the mean square asymptotic stability of a class of random difference equations in RKHS, whose non-homogeneous terms are martingale difference sequences dependent on the homogeneous ones. Secondly, we introduce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.03211v4-abstract-full').style.display = 'inline'; document.getElementById('2404.03211v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.03211v4-abstract-full" style="display: none;"> We study the convergence of recursive regularized learning algorithms in the reproducing kernel Hilbert space (RKHS) with dependent and non-stationary online data streams. Firstly, we study the mean square asymptotic stability of a class of random difference equations in RKHS, whose non-homogeneous terms are martingale difference sequences dependent on the homogeneous ones. Secondly, we introduce the concept of random Tikhonov regularization path, and show that if the regularization path is slowly time-varying in some sense, then the output of the algorithm is consistent with the regularization path in mean square. Furthermore, if the data streams also satisfy the RKHS persistence of excitation condition, i.e. there exists a fixed length of time period, such that the conditional expectation of the operators induced by the input data accumulated over every time period has a uniformly strictly positive compact lower bound in the sense of the operator order with respect to time, then the output of the algorithm is consistent with the unknown function in mean square. Finally, for the case with independent and non-identically distributed data streams, the algorithm achieves the mean square consistency provided the marginal probability measures induced by the input data are slowly time-varying and the average measure over each fixed-length time period has a uniformly strictly positive lower bound. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.03211v4-abstract-full').style.display = 'none'; document.getElementById('2404.03211v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.15636">arXiv:2403.15636</a> <span> [<a href="https://arxiv.org/pdf/2403.15636">pdf</a>, <a href="https://arxiv.org/ps/2403.15636">ps</a>, <a href="https://arxiv.org/format/2403.15636">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> On the Variational Interpretation of Mirror Play in Monotone Games </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Pan%2C+Y">Yunian Pan</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tao Li</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+Q">Quanyan Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.15636v1-abstract-short" style="display: inline;"> Mirror play (MP) is a well-accepted primal-dual multi-agent learning algorithm where all agents simultaneously implement mirror descent in a distributed fashion. The advantage of MP over vanilla gradient play lies in its usage of mirror maps that better exploit the geometry of decision domains. Despite extensive literature dedicated to the asymptotic convergence of MP to equilibrium, the understan… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15636v1-abstract-full').style.display = 'inline'; document.getElementById('2403.15636v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.15636v1-abstract-full" style="display: none;"> Mirror play (MP) is a well-accepted primal-dual multi-agent learning algorithm where all agents simultaneously implement mirror descent in a distributed fashion. The advantage of MP over vanilla gradient play lies in its usage of mirror maps that better exploit the geometry of decision domains. Despite extensive literature dedicated to the asymptotic convergence of MP to equilibrium, the understanding of the finite-time behavior of MP before reaching equilibrium is still rudimentary. To facilitate the study of MP's non-equilibrium performance, this work establishes an equivalence between MP's finite-time primal-dual path (mirror path) in monotone games and the closed-loop Nash equilibrium path of a finite-horizon differential game, referred to as mirror differential game (MDG). Our construction of MDG rests on the Brezis-Ekeland variational principle, and the stage cost functional for MDG is Fenchel coupling between MP's iterates and associated gradient updates. The variational interpretation of mirror path in static games as the equilibrium path in MDG holds in deterministic and stochastic cases. Such a variational interpretation translates the non-equilibrium studies of learning dynamics into a more tractable equilibrium analysis of dynamic games, as demonstrated in a case study on the Cournot game, where MP dynamics corresponds to a linear quadratic game. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15636v1-abstract-full').style.display = 'none'; document.getElementById('2403.15636v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.12467">arXiv:2403.12467</a> <span> [<a href="https://arxiv.org/pdf/2403.12467">pdf</a>, <a href="https://arxiv.org/format/2403.12467">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Digital Twin Channel for 6G: Concepts, Architectures and Potential Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+H">Heng Wang</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+J">Jianhua Zhang</a>, <a href="/search/eess?searchtype=author&query=Nie%2C+G">Gaofeng Nie</a>, <a href="/search/eess?searchtype=author&query=Yu%2C+L">Li Yu</a>, <a href="/search/eess?searchtype=author&query=Yuan%2C+Z">Zhiqiang Yuan</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tongjie Li</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jialin Wang</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+G">Guangyi Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.12467v4-abstract-short" style="display: inline;"> Digital twin channel (DTC) is the real-time mapping of a wireless channel from the physical world to the digital world, which is expected to provide significant performance enhancements for the sixth-generation (6G) air-interface design. In this work, we first define five evolution levels of channel twins with the progression of wireless communication. The fifth level, autonomous DTC, is elaborate… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.12467v4-abstract-full').style.display = 'inline'; document.getElementById('2403.12467v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.12467v4-abstract-full" style="display: none;"> Digital twin channel (DTC) is the real-time mapping of a wireless channel from the physical world to the digital world, which is expected to provide significant performance enhancements for the sixth-generation (6G) air-interface design. In this work, we first define five evolution levels of channel twins with the progression of wireless communication. The fifth level, autonomous DTC, is elaborated with multi-dimensional factors such as methodology, characterization precision, and data category. Then, we provide detailed insights into the requirements and architecture of a complete DTC for 6G. Subsequently, a sensing-enhanced real-time channel prediction platform and experimental validations are exhibited. Finally, drawing from the vision of the 6G network, we explore the potential applications and the open issues in future DTC research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.12467v4-abstract-full').style.display = 'none'; document.getElementById('2403.12467v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 5 figures, 15 references. It is submitted to IEEE journal</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.00274">arXiv:2403.00274</a> <span> [<a href="https://arxiv.org/pdf/2403.00274">pdf</a>, <a href="https://arxiv.org/format/2403.00274">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> CustomListener: Text-guided Responsive Interaction for User-friendly Listening Head Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+X">Xi Liu</a>, <a href="/search/eess?searchtype=author&query=Guo%2C+Y">Ying Guo</a>, <a href="/search/eess?searchtype=author&query=Zhen%2C+C">Cheng Zhen</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tong Li</a>, <a href="/search/eess?searchtype=author&query=Ao%2C+Y">Yingying Ao</a>, <a href="/search/eess?searchtype=author&query=Yan%2C+P">Pengfei Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.00274v2-abstract-short" style="display: inline;"> Listening head generation aims to synthesize a non-verbal responsive listener head by modeling the correlation between the speaker and the listener in dynamic conversion.The applications of listener agent generation in virtual interaction have promoted many works achieving the diverse and fine-grained motion generation. However, they can only manipulate motions through simple emotional labels, but… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00274v2-abstract-full').style.display = 'inline'; document.getElementById('2403.00274v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.00274v2-abstract-full" style="display: none;"> Listening head generation aims to synthesize a non-verbal responsive listener head by modeling the correlation between the speaker and the listener in dynamic conversion.The applications of listener agent generation in virtual interaction have promoted many works achieving the diverse and fine-grained motion generation. However, they can only manipulate motions through simple emotional labels, but cannot freely control the listener's motions. Since listener agents should have human-like attributes (e.g. identity, personality) which can be freely customized by users, this limits their realism. In this paper, we propose a user-friendly framework called CustomListener to realize the free-form text prior guided listener generation. To achieve speaker-listener coordination, we design a Static to Dynamic Portrait module (SDP), which interacts with speaker information to transform static text into dynamic portrait token with completion rhythm and amplitude information. To achieve coherence between segments, we design a Past Guided Generation Module (PGG) to maintain the consistency of customized listener attributes through the motion prior, and utilize a diffusion-based structure conditioned on the portrait token and the motion prior to realize the controllable generation. To train and evaluate our model, we have constructed two text-annotated listening head datasets based on ViCo and RealTalk, which provide text-video paired labels. Extensive experiments have verified the effectiveness of our model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00274v2-abstract-full').style.display = 'none'; document.getElementById('2403.00274v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CVPR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.18781">arXiv:2402.18781</a> <span> [<a href="https://arxiv.org/pdf/2402.18781">pdf</a>, <a href="https://arxiv.org/ps/2402.18781">ps</a>, <a href="https://arxiv.org/format/2402.18781">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Conjectural Online Learning with First-order Beliefs in Asymmetric Information Stochastic Games </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+T">Tao Li</a>, <a href="/search/eess?searchtype=author&query=Hammar%2C+K">Kim Hammar</a>, <a href="/search/eess?searchtype=author&query=Stadler%2C+R">Rolf Stadler</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+Q">Quanyan Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.18781v4-abstract-short" style="display: inline;"> Asymmetric information stochastic games (AISGs) arise in many complex socio-technical systems, such as cyber-physical systems and IT infrastructures. Existing computational methods for AISGs are primarily offline and can not adapt to equilibrium deviations. Further, current methods are limited to particular information structures to avoid belief hierarchies. Considering these limitations, we propo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18781v4-abstract-full').style.display = 'inline'; document.getElementById('2402.18781v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.18781v4-abstract-full" style="display: none;"> Asymmetric information stochastic games (AISGs) arise in many complex socio-technical systems, such as cyber-physical systems and IT infrastructures. Existing computational methods for AISGs are primarily offline and can not adapt to equilibrium deviations. Further, current methods are limited to particular information structures to avoid belief hierarchies. Considering these limitations, we propose conjectural online learning (COL), an online learning method under generic information structures in AISGs. COL uses a forecaster-actor-critic (FAC) architecture, where subjective forecasts are used to conjecture the opponents' strategies within a lookahead horizon, and Bayesian learning is used to calibrate the conjectures. To adapt strategies to nonstationary environments based on information feedback, COL uses online rollout with cost function approximation (actor-critic). We prove that the conjectures produced by COL are asymptotically consistent with the information feedback in the sense of a relaxed Bayesian consistency. We also prove that the empirical strategy profile induced by COL converges to the Berk-Nash equilibrium, a solution concept characterizing rationality under subjectivity. Experimental results from an intrusion response use case demonstrate COL's {faster convergence} over state-of-the-art reinforcement learning methods against nonstationary attacks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18781v4-abstract-full').style.display = 'none'; document.getElementById('2402.18781v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to the 63rd IEEE Conference on Decision and Control, Special Session on Networks, Games and Learning</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.12499">arXiv:2402.12499</a> <span> [<a href="https://arxiv.org/pdf/2402.12499">pdf</a>, <a href="https://arxiv.org/format/2402.12499">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Automated Security Response through Online Learning with Adaptive Conjectures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hammar%2C+K">Kim Hammar</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tao Li</a>, <a href="/search/eess?searchtype=author&query=Stadler%2C+R">Rolf Stadler</a>, <a href="/search/eess?searchtype=author&query=Zhu%2C+Q">Quanyan Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.12499v3-abstract-short" style="display: inline;"> We study automated security response for an IT infrastructure and formulate the interaction between an attacker and a defender as a partially observed, non-stationary game. We relax the standard assumption that the game model is correctly specified and consider that each player has a probabilistic conjecture about the model, which may be misspecified in the sense that the true model has probabilit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.12499v3-abstract-full').style.display = 'inline'; document.getElementById('2402.12499v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.12499v3-abstract-full" style="display: none;"> We study automated security response for an IT infrastructure and formulate the interaction between an attacker and a defender as a partially observed, non-stationary game. We relax the standard assumption that the game model is correctly specified and consider that each player has a probabilistic conjecture about the model, which may be misspecified in the sense that the true model has probability 0. This formulation allows us to capture uncertainty and misconception about the infrastructure and the intents of the players. To learn effective game strategies online, we design Conjectural Online Learning (COL), a novel method where a player iteratively adapts its conjecture using Bayesian learning and updates its strategy through rollout. We prove that the conjectures converge to best fits, and we provide a bound on the performance improvement that rollout enables with a conjectured model. To characterize the steady state of the game, we propose a variant of the Berk-Nash equilibrium. We present COL through an advanced persistent threat use case. Testbed evaluations show that COL produces effective security strategies that adapt to a changing environment. We also find that COL enables faster convergence than current reinforcement learning techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.12499v3-abstract-full').style.display = 'none'; document.getElementById('2402.12499v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to the IEEE for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.09181">arXiv:2402.09181</a> <span> [<a href="https://arxiv.org/pdf/2402.09181">pdf</a>, <a href="https://arxiv.org/format/2402.09181">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> OmniMedVQA: A New Large-Scale Comprehensive Evaluation Benchmark for Medical LVLM </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Hu%2C+Y">Yutao Hu</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tianbin Li</a>, <a href="/search/eess?searchtype=author&query=Lu%2C+Q">Quanfeng Lu</a>, <a href="/search/eess?searchtype=author&query=Shao%2C+W">Wenqi Shao</a>, <a href="/search/eess?searchtype=author&query=He%2C+J">Junjun He</a>, <a href="/search/eess?searchtype=author&query=Qiao%2C+Y">Yu Qiao</a>, <a href="/search/eess?searchtype=author&query=Luo%2C+P">Ping Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.09181v2-abstract-short" style="display: inline;"> Large Vision-Language Models (LVLMs) have demonstrated remarkable capabilities in various multimodal tasks. However, their potential in the medical domain remains largely unexplored. A significant challenge arises from the scarcity of diverse medical images spanning various modalities and anatomical regions, which is essential in real-world medical applications. To solve this problem, in this pape… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09181v2-abstract-full').style.display = 'inline'; document.getElementById('2402.09181v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.09181v2-abstract-full" style="display: none;"> Large Vision-Language Models (LVLMs) have demonstrated remarkable capabilities in various multimodal tasks. However, their potential in the medical domain remains largely unexplored. A significant challenge arises from the scarcity of diverse medical images spanning various modalities and anatomical regions, which is essential in real-world medical applications. To solve this problem, in this paper, we introduce OmniMedVQA, a novel comprehensive medical Visual Question Answering (VQA) benchmark. This benchmark is collected from 73 different medical datasets, including 12 different modalities and covering more than 20 distinct anatomical regions. Importantly, all images in this benchmark are sourced from authentic medical scenarios, ensuring alignment with the requirements of the medical field and suitability for evaluating LVLMs. Through our extensive experiments, we have found that existing LVLMs struggle to address these medical VQA problems effectively. Moreover, what surprises us is that medical-specialized LVLMs even exhibit inferior performance to those general-domain models, calling for a more versatile and robust LVLM in the biomedical field. The evaluation results not only reveal the current limitations of LVLM in understanding real medical images but also highlight our dataset's significance. Our code with dataset are available at https://github.com/OpenGVLab/Multi-Modality-Arena. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09181v2-abstract-full').style.display = 'none'; document.getElementById('2402.09181v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.05642">arXiv:2402.05642</a> <span> [<a href="https://arxiv.org/pdf/2402.05642">pdf</a>, <a href="https://arxiv.org/ps/2402.05642">ps</a>, <a href="https://arxiv.org/format/2402.05642">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> An Optimization-based Baseline for Rigid 2D/3D Registration Applied to Spine Surgical Navigation Using CMA-ES </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Chen%2C+M">Minheng Chen</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tonglong Li</a>, <a href="/search/eess?searchtype=author&query=Zhang%2C+Z">Zhirun Zhang</a>, <a href="/search/eess?searchtype=author&query=Kong%2C+Y">Youyong Kong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.05642v3-abstract-short" style="display: inline;"> A robust and efficient optimization-based 2D/3D registration framework is crucial for the navigation system of orthopedic surgical robots. It can provide precise position information of surgical instruments and implants during surgery. While artificial intelligence technology has advanced rapidly in recent years, traditional optimization-based registration methods remain indispensable in the field… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05642v3-abstract-full').style.display = 'inline'; document.getElementById('2402.05642v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.05642v3-abstract-full" style="display: none;"> A robust and efficient optimization-based 2D/3D registration framework is crucial for the navigation system of orthopedic surgical robots. It can provide precise position information of surgical instruments and implants during surgery. While artificial intelligence technology has advanced rapidly in recent years, traditional optimization-based registration methods remain indispensable in the field of 2D/3D registration.he exceptional precision of this method enables it to be considered as a post-processing step of the learning-based methods, thereby offering a reliable assurance for registration. In this paper, we present a coarse-to-fine registration framework based on the CMA-ES algorithm. We conducted intensive testing of our method using data from different parts of the spine. The results shows the effectiveness of the proposed framework on real orthopedic spine surgery clinical data. This work can be viewed as an additional extension that complements the optimization-based methods employed in our previous studies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.05642v3-abstract-full').style.display = 'none'; document.getElementById('2402.05642v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.03030">arXiv:2402.03030</a> <span> [<a href="https://arxiv.org/pdf/2402.03030">pdf</a>, <a href="https://arxiv.org/format/2402.03030">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Rejection-Sampled Universal Quantization for Smaller Quantization Errors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Ling%2C+C+W">Chih Wei Ling</a>, <a href="/search/eess?searchtype=author&query=Li%2C+C+T">Cheuk Ting Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.03030v1-abstract-short" style="display: inline;"> We construct a randomized vector quantizer which has a smaller maximum error compared to all known lattice quantizers with the same entropy for dimensions 5, 6, ..., 48, and also has a smaller mean squared error compared to known lattice quantizers with the same entropy for dimensions 35, ..., 48, in the high resolution limit. Moreover, our randomized quantizer has a desirable property that the qu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.03030v1-abstract-full').style.display = 'inline'; document.getElementById('2402.03030v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.03030v1-abstract-full" style="display: none;"> We construct a randomized vector quantizer which has a smaller maximum error compared to all known lattice quantizers with the same entropy for dimensions 5, 6, ..., 48, and also has a smaller mean squared error compared to known lattice quantizers with the same entropy for dimensions 35, ..., 48, in the high resolution limit. Moreover, our randomized quantizer has a desirable property that the quantization error is always uniform over the ball and independent of the input. Our construction is based on applying rejection sampling on universal quantization, which allows us to shape the error distribution to be any continuous distribution, not only uniform distributions over basic cells of a lattice as in conventional dithered quantization. We also characterize the high SNR limit of one-shot channel simulation for any additive noise channel under a mild assumption (e.g., the AWGN channel), up to an additive constant of 1.45 bits. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.03030v1-abstract-full').style.display = 'none'; document.getElementById('2402.03030v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.00080">arXiv:2402.00080</a> <span> [<a href="https://arxiv.org/pdf/2402.00080">pdf</a>, <a href="https://arxiv.org/ps/2402.00080">ps</a>, <a href="https://arxiv.org/format/2402.00080">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Arithmetic Average Density Fusion -- Part IV: Distributed Heterogeneous Fusion of RFS and LRFS Filters via Variational Approximation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Li%2C+T">Tiancheng Li</a>, <a href="/search/eess?searchtype=author&query=Liang%2C+H">Haozhe Liang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+G">Guchong Li</a>, <a href="/search/eess?searchtype=author&query=Herrero%2C+J+G">Jes煤s Garc铆a Herrero</a>, <a href="/search/eess?searchtype=author&query=Pan%2C+Q">Quan Pan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.00080v1-abstract-short" style="display: inline;"> This paper, the fourth part of a series of papers on the arithmetic average (AA) density fusion approach and its application for target tracking, addresses the intricate challenge of distributed heterogeneous multisensor multitarget tracking, where each inter-connected sensor operates a probability hypothesis density (PHD) filter, a multiple Bernoulli (MB) filter or a labeled MB (LMB) filter and t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.00080v1-abstract-full').style.display = 'inline'; document.getElementById('2402.00080v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.00080v1-abstract-full" style="display: none;"> This paper, the fourth part of a series of papers on the arithmetic average (AA) density fusion approach and its application for target tracking, addresses the intricate challenge of distributed heterogeneous multisensor multitarget tracking, where each inter-connected sensor operates a probability hypothesis density (PHD) filter, a multiple Bernoulli (MB) filter or a labeled MB (LMB) filter and they cooperate with each other via information fusion. Earlier papers in this series have proven that the proper AA fusion of these filters is all exactly built on averaging their respective unlabeled/labeled PHDs. Based on this finding, two PHD-AA fusion approaches are proposed via variational minimization of the upper bound of the Kullback-Leibler divergence between the local and multi-filter averaged PHDs subject to cardinality consensus based on the Gaussian mixture implementation, enabling heterogeneous filter cooperation. One focuses solely on fitting the weights of the local Gaussian components (L-GCs), while the other simultaneously fits all the parameters of the L-GCs at each sensor, both seeking average consensus on the unlabeled PHD, irrespective of the specific posterior form of the local filters. For the distributed peer-to-peer communication, both the classic consensus and flooding paradigms have been investigated. Simulations have demonstrated the effectiveness and flexibility of the proposed approaches in both homogeneous and heterogeneous scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.00080v1-abstract-full').style.display = 'none'; document.getElementById('2402.00080v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages,14 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.15111">arXiv:2401.15111</a> <span> [<a href="https://arxiv.org/pdf/2401.15111">pdf</a>, <a href="https://arxiv.org/format/2401.15111">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Improving Fairness of Automated Chest X-ray Diagnosis by Contrastive Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Lin%2C+M">Mingquan Lin</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tianhao Li</a>, <a href="/search/eess?searchtype=author&query=Sun%2C+Z">Zhaoyi Sun</a>, <a href="/search/eess?searchtype=author&query=Holste%2C+G">Gregory Holste</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+Y">Ying Ding</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+F">Fei Wang</a>, <a href="/search/eess?searchtype=author&query=Shih%2C+G">George Shih</a>, <a href="/search/eess?searchtype=author&query=Peng%2C+Y">Yifan Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.15111v1-abstract-short" style="display: inline;"> Purpose: Limited studies exploring concrete methods or approaches to tackle and enhance model fairness in the radiology domain. Our proposed AI model utilizes supervised contrastive learning to minimize bias in CXR diagnosis. Materials and Methods: In this retrospective study, we evaluated our proposed method on two datasets: the Medical Imaging and Data Resource Center (MIDRC) dataset with 77,8… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.15111v1-abstract-full').style.display = 'inline'; document.getElementById('2401.15111v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.15111v1-abstract-full" style="display: none;"> Purpose: Limited studies exploring concrete methods or approaches to tackle and enhance model fairness in the radiology domain. Our proposed AI model utilizes supervised contrastive learning to minimize bias in CXR diagnosis. Materials and Methods: In this retrospective study, we evaluated our proposed method on two datasets: the Medical Imaging and Data Resource Center (MIDRC) dataset with 77,887 CXR images from 27,796 patients collected as of April 20, 2023 for COVID-19 diagnosis, and the NIH Chest X-ray (NIH-CXR) dataset with 112,120 CXR images from 30,805 patients collected between 1992 and 2015. In the NIH-CXR dataset, thoracic abnormalities include atelectasis, cardiomegaly, effusion, infiltration, mass, nodule, pneumonia, pneumothorax, consolidation, edema, emphysema, fibrosis, pleural thickening, or hernia. Our proposed method utilizes supervised contrastive learning with carefully selected positive and negative samples to generate fair image embeddings, which are fine-tuned for subsequent tasks to reduce bias in chest X-ray (CXR) diagnosis. We evaluated the methods using the marginal AUC difference ($未$ mAUC). Results: The proposed model showed a significant decrease in bias across all subgroups when compared to the baseline models, as evidenced by a paired T-test (p<0.0001). The $未$ mAUC obtained by our method were 0.0116 (95\% CI, 0.0110-0.0123), 0.2102 (95% CI, 0.2087-0.2118), and 0.1000 (95\% CI, 0.0988-0.1011) for sex, race, and age on MIDRC, and 0.0090 (95\% CI, 0.0082-0.0097) for sex and 0.0512 (95% CI, 0.0512-0.0532) for age on NIH-CXR, respectively. Conclusion: Employing supervised contrastive learning can mitigate bias in CXR diagnosis, addressing concerns of fairness and reliability in deep learning-based diagnostic methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.15111v1-abstract-full').style.display = 'none'; document.getElementById('2401.15111v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 5 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> arms.org </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.08956">arXiv:2401.08956</a> <span> [<a href="https://arxiv.org/pdf/2401.08956">pdf</a>, <a href="https://arxiv.org/format/2401.08956">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TAES.2023.3260059">10.1109/TAES.2023.3260059 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Unified NOMA Framework in Beam-Hopping Satellite Communication Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Zhang%2C+X">Xuyang Zhang</a>, <a href="/search/eess?searchtype=author&query=Yue%2C+X">Xinwei Yue</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tian Li</a>, <a href="/search/eess?searchtype=author&query=Han%2C+Z">Zhihao Han</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+Y">Yafei Wang</a>, <a href="/search/eess?searchtype=author&query=Ding%2C+Y">Yong Ding</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+R">Rongke Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.08956v1-abstract-short" style="display: inline;"> This paper investigates the application of a unified non-orthogonal multiple access framework in beam hopping (U-NOMA-BH) based satellite communication systems. More specifically, the proposed U-NOMA-BH framework can be applied to code-domain NOMA based BH (CD-NOMA-BH) and power-domain NOMA based BH (PD-NOMA-BH) systems. To satisfy dynamic-uneven traffic demands, we formulate the optimization prob… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08956v1-abstract-full').style.display = 'inline'; document.getElementById('2401.08956v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.08956v1-abstract-full" style="display: none;"> This paper investigates the application of a unified non-orthogonal multiple access framework in beam hopping (U-NOMA-BH) based satellite communication systems. More specifically, the proposed U-NOMA-BH framework can be applied to code-domain NOMA based BH (CD-NOMA-BH) and power-domain NOMA based BH (PD-NOMA-BH) systems. To satisfy dynamic-uneven traffic demands, we formulate the optimization problem to minimize the square of discrete difference by jointly optimizing power allocation, carrier assignment and beam scheduling. The non-convexity of the objective function and the constraint condition is solved through Dinkelbach's transform and variable relaxation. As a further development, the closed-from and asymptotic expressions of outage probability are derived for CD/PD-NOMA-BH systems. Based on approximated results, the diversity orders of a pair of users are obtained in detail. In addition, the system throughput of U-NOMA-BH is discussed in delay-limited transmission mode. Numerical results verify that: i) The gap between traffic requests of CD/PD-NOMA-BH systems appears to be more closely compared with orthogonal multiple access based BH (OMA-BH); ii) The CD-NOMA-BH system is capable of providing the enhanced traffic request and capacity provision; and iii) The outage behaviors of CD/PD-NOMA-BH are better than that of OMA-BH. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08956v1-abstract-full').style.display = 'none'; document.getElementById('2401.08956v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Aerospace and Electronic Systems, vol. 59, no. 5, pp. 5390-5404, Oct. 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.05363">arXiv:2401.05363</a> <span> [<a href="https://arxiv.org/pdf/2401.05363">pdf</a>, <a href="https://arxiv.org/format/2401.05363">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Generalizable Sleep Staging via Multi-Level Domain Alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Wang%2C+J">Jiquan Wang</a>, <a href="/search/eess?searchtype=author&query=Zhao%2C+S">Sha Zhao</a>, <a href="/search/eess?searchtype=author&query=Jiang%2C+H">Haiteng Jiang</a>, <a href="/search/eess?searchtype=author&query=Li%2C+S">Shijian Li</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tao Li</a>, <a href="/search/eess?searchtype=author&query=Pan%2C+G">Gang Pan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.05363v4-abstract-short" style="display: inline;"> Automatic sleep staging is essential for sleep assessment and disorder diagnosis. Most existing methods depend on one specific dataset and are limited to be generalized to other unseen datasets, for which the training data and testing data are from the same dataset. In this paper, we introduce domain generalization into automatic sleep staging and propose the task of generalizable sleep staging wh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05363v4-abstract-full').style.display = 'inline'; document.getElementById('2401.05363v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.05363v4-abstract-full" style="display: none;"> Automatic sleep staging is essential for sleep assessment and disorder diagnosis. Most existing methods depend on one specific dataset and are limited to be generalized to other unseen datasets, for which the training data and testing data are from the same dataset. In this paper, we introduce domain generalization into automatic sleep staging and propose the task of generalizable sleep staging which aims to improve the model generalization ability to unseen datasets. Inspired by existing domain generalization methods, we adopt the feature alignment idea and propose a framework called SleepDG to solve it. Considering both of local salient features and sequential features are important for sleep staging, we propose a Multi-level Feature Alignment combining epoch-level and sequence-level feature alignment to learn domain-invariant feature representations. Specifically, we design an Epoch-level Feature Alignment to align the feature distribution of each single sleep epoch among different domains, and a Sequence-level Feature Alignment to minimize the discrepancy of sequential features among different domains. SleepDG is validated on five public datasets, achieving the state-of-the-art performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05363v4-abstract-full').style.display = 'none'; document.getElementById('2401.05363v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by the Thirty-Eighth AAAI Conference on Artificial Intelligence (AAAI-24)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.00283">arXiv:2401.00283</a> <span> [<a href="https://arxiv.org/pdf/2401.00283">pdf</a>, <a href="https://arxiv.org/format/2401.00283">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Near-Space Communications: the Last Piece of 6G Space-Air-Ground-Sea Integrated Network Puzzle </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/eess?searchtype=author&query=Liu%2C+H">Hongshan Liu</a>, <a href="/search/eess?searchtype=author&query=Qin%2C+T">Tong Qin</a>, <a href="/search/eess?searchtype=author&query=Gao%2C+Z">Zhen Gao</a>, <a href="/search/eess?searchtype=author&query=Mao%2C+T">Tianqi Mao</a>, <a href="/search/eess?searchtype=author&query=Ying%2C+K">Keke Ying</a>, <a href="/search/eess?searchtype=author&query=Wan%2C+Z">Ziwei Wan</a>, <a href="/search/eess?searchtype=author&query=Qiao%2C+L">Li Qiao</a>, <a href="/search/eess?searchtype=author&query=Na%2C+R">Rui Na</a>, <a href="/search/eess?searchtype=author&query=Li%2C+Z">Zhongxiang Li</a>, <a href="/search/eess?searchtype=author&query=Hu%2C+C">Chun Hu</a>, <a href="/search/eess?searchtype=author&query=Mei%2C+Y">Yikun Mei</a>, <a href="/search/eess?searchtype=author&query=Li%2C+T">Tuan Li</a>, <a href="/search/eess?searchtype=author&query=Wen%2C+G">Guanghui Wen</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+L">Lei Chen</a>, <a href="/search/eess?searchtype=author&query=Wu%2C+Z">Zhonghuai Wu</a>, <a href="/search/eess?searchtype=author&query=Liu%2C+R">Ruiqi Liu</a>, <a href="/search/eess?searchtype=author&query=Chen%2C+G">Gaojie Chen</a>, <a href="/search/eess?searchtype=author&query=Wang%2C+S">Shuo Wang</a>, <a href="/search/eess?searchtype=author&query=Zheng%2C+D">Dezhi Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.00283v2-abstract-short" style="display: inline;"> This article presents a comprehensive study on the emerging near-space communications (NS-COM) within the context of space-air-ground-sea integrated network (SAGSIN). Specifically, we firstly explore the recent technical developments of NS-COM, followed by the discussions about motivations behind integrating NS-COM into SAGSIN. To further demonstrate the necessity of NS-COM, a comparative analysis… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.00283v2-abstract-full').style.display = 'inline'; document.getElementById('2401.00283v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.00283v2-abstract-full" style="display: none;"> This article presents a comprehensive study on the emerging near-space communications (NS-COM) within the context of space-air-ground-sea integrated network (SAGSIN). Specifically, we firstly explore the recent technical developments of NS-COM, followed by the discussions about motivations behind integrating NS-COM into SAGSIN. To further demonstrate the necessity of NS-COM, a comparative analysis between the NS-COM network and other counterparts in SAGSIN is conducted, covering aspects of deployment, coverage, channel characteristics and unique problems of NS-COM network. Afterwards, the technical aspects of NS-COM, including channel modeling, random access, channel estimation, array-based beam management and joint network optimization, are examined in detail. Furthermore, we explore the potential applications of NS-COM, such as structural expansion in SAGSIN communication, civil aviation communication, remote and urgent communication, weather monitoring and carbon neutrality. Finally, some promising research avenues are identified, including stratospheric satellite (StratoSat) -to-ground direct links for mobile terminals, reconfigurable multiple-input multiple-output (MIMO) and holographic MIMO, federated learning in NS-COM networks, maritime communication, electromagnetic spectrum sensing and adversarial game, integrated sensing and communications, StratoSat-based radar detection and imaging, NS-COM assisted enhanced global navigation system, NS-COM assisted intelligent unmanned system and free space optical (FSO) communication. Overall, this paper highlights that the NS-COM plays an indispensable role in the SAGSIN puzzle, providing substantial performance and coverage enhancement to the traditional SAGSIN architecture. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.00283v2-abstract-full').style.display = 'none'; document.getElementById('2401.00283v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages, 8 figures, 2 tables</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Li%2C+T&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Li%2C+T&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+T&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+T&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+T&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+T&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository