Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 122 results for author: <span class="mathjax">Qiu, M</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Qiu%2C+M">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Qiu, M"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Qiu%2C+M&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Qiu, M"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Qiu%2C+M&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Qiu%2C+M&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Qiu%2C+M&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Qiu%2C+M&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15529">arXiv:2411.15529</a> <span> [<a href="https://arxiv.org/pdf/2411.15529">pdf</a>, <a href="https://arxiv.org/format/2411.15529">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Uplink Multiple Access with Heterogeneous Blocklength and Reliability Constraints: Discrete Signaling with Treating Interference as Noise </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Min Qiu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yu-Chih Huang</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+J">Jinhong Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15529v1-abstract-short" style="display: inline;"> We consider the uplink multiple access of heterogeneous users, e.g., ultra-reliable low-latency communications (URLLC) and enhanced mobile broadband (eMBB) users. Each user has its own reliability requirement and blocklength constraint, and users transmitting longer blocks suffer from heterogeneous interference. On top of that, the decoding of URLLC messages cannot leverage successive interference… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15529v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15529v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15529v1-abstract-full" style="display: none;"> We consider the uplink multiple access of heterogeneous users, e.g., ultra-reliable low-latency communications (URLLC) and enhanced mobile broadband (eMBB) users. Each user has its own reliability requirement and blocklength constraint, and users transmitting longer blocks suffer from heterogeneous interference. On top of that, the decoding of URLLC messages cannot leverage successive interference cancellation (SIC) owing to the stringent latency requirements. This can significantly degrade the spectral efficiency of all URLLC users when the interference is strong. To overcome this issue, we propose a new multiple access scheme employing discrete signaling and treating interference as noise (TIN) decoding, i.e., without SIC. Specifically, to handle heterogeneous interference while maintaining the single-user encoding and decoding complexities, each user uses a single channel code and maps its coded bits onto sub-blocks of symbols, where the underlying constellations can be different. We demonstrate theoretically and numerically that the proposed scheme employing quadrature amplitude modulations and TIN decoding can perform very close to the benchmark scheme based on Gaussian signaling with perfect SIC decoding. Interestingly, we show that the proposed scheme does not need to use all the transmit power budget, but also can sometimes even outperform the benchmark scheme. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15529v1-abstract-full').style.display = 'none'; document.getElementById('2411.15529v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 7 figures, accepted by IEEE Transactions on Communications. arXiv admin note: text overlap with arXiv:2308.08883</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10915">arXiv:2411.10915</a> <span> [<a href="https://arxiv.org/pdf/2411.10915">pdf</a>, <a href="https://arxiv.org/ps/2411.10915">ps</a>, <a href="https://arxiv.org/format/2411.10915">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Bias in Large Language Models: Origin, Evaluation, and Mitigation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yufei Guo</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+M">Muzhe Guo</a>, <a href="/search/cs?searchtype=author&query=Su%2C+J">Juntao Su</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhou Yang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+M">Mengqiu Zhu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hongfei Li</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Mengyang Qiu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S+S">Shuo Shuo Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10915v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have revolutionized natural language processing, but their susceptibility to biases poses significant challenges. This comprehensive review examines the landscape of bias in LLMs, from its origins to current mitigation strategies. We categorize biases as intrinsic and extrinsic, analyzing their manifestations in various NLP tasks. The review critically assesses a range… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10915v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10915v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10915v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have revolutionized natural language processing, but their susceptibility to biases poses significant challenges. This comprehensive review examines the landscape of bias in LLMs, from its origins to current mitigation strategies. We categorize biases as intrinsic and extrinsic, analyzing their manifestations in various NLP tasks. The review critically assesses a range of bias evaluation methods, including data-level, model-level, and output-level approaches, providing researchers with a robust toolkit for bias detection. We further explore mitigation strategies, categorizing them into pre-model, intra-model, and post-model techniques, highlighting their effectiveness and limitations. Ethical and legal implications of biased LLMs are discussed, emphasizing potential harms in real-world applications such as healthcare and criminal justice. By synthesizing current knowledge on bias in LLMs, this review contributes to the ongoing effort to develop fair and responsible AI systems. Our work serves as a comprehensive resource for researchers and practitioners working towards understanding, evaluating, and mitigating bias in LLMs, fostering the development of more equitable AI technologies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10915v1-abstract-full').style.display = 'none'; document.getElementById('2411.10915v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06297">arXiv:2411.06297</a> <span> [<a href="https://arxiv.org/pdf/2411.06297">pdf</a>, <a href="https://arxiv.org/format/2411.06297">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Aspect Ratios with Patch-Mixup-ViT-based Vehicle ReID </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Mei Qiu</a>, <a href="/search/cs?searchtype=author&query=Christopher%2C+L+A">Lauren Ann Christopher</a>, <a href="/search/cs?searchtype=author&query=Chien%2C+S">Stanley Chien</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lingxi Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06297v1-abstract-short" style="display: inline;"> Vision Transformers (ViTs) have shown exceptional performance in vehicle re-identification (ReID) tasks. However, non-square aspect ratios of image or video inputs can negatively impact re-identification accuracy. To address this challenge, we propose a novel, human perception driven, and general ViT-based ReID framework that fuses models trained on various aspect ratios. Our key contributions are… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06297v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06297v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06297v1-abstract-full" style="display: none;"> Vision Transformers (ViTs) have shown exceptional performance in vehicle re-identification (ReID) tasks. However, non-square aspect ratios of image or video inputs can negatively impact re-identification accuracy. To address this challenge, we propose a novel, human perception driven, and general ViT-based ReID framework that fuses models trained on various aspect ratios. Our key contributions are threefold: (i) We analyze the impact of aspect ratios on performance using the VeRi-776 and VehicleID datasets, providing guidance for input settings based on the distribution of original image aspect ratios. (ii) We introduce patch-wise mixup strategy during ViT patchification (guided by spatial attention scores) and implement uneven stride for better alignment with object aspect ratios. (iii) We propose a dynamic feature fusion ReID network to enhance model robustness. Our method outperforms state-of-the-art transformer-based approaches on both datasets, with only a minimal increase in inference time per image. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06297v1-abstract-full').style.display = 'none'; document.getElementById('2411.06297v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21813">arXiv:2410.21813</a> <span> [<a href="https://arxiv.org/pdf/2410.21813">pdf</a>, <a href="https://arxiv.org/format/2410.21813">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SAM-Swin: SAM-Driven Dual-Swin Transformers with Adaptive Lesion Enhancement for Laryngo-Pharyngeal Tumor Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+J">Jia Wei</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yun Li</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+X">Xiaomao Fan</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+W">Wenjun Ma</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Meiyu Qiu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hongyu Chen</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+W">Wenbin Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21813v1-abstract-short" style="display: inline;"> Laryngo-pharyngeal cancer (LPC) is a highly lethal malignancy in the head and neck region. Recent advancements in tumor detection, particularly through dual-branch network architectures, have significantly improved diagnostic accuracy by integrating global and local feature extraction. However, challenges remain in accurately localizing lesions and fully capitalizing on the complementary nature of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21813v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21813v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21813v1-abstract-full" style="display: none;"> Laryngo-pharyngeal cancer (LPC) is a highly lethal malignancy in the head and neck region. Recent advancements in tumor detection, particularly through dual-branch network architectures, have significantly improved diagnostic accuracy by integrating global and local feature extraction. However, challenges remain in accurately localizing lesions and fully capitalizing on the complementary nature of features within these branches. To address these issues, we propose SAM-Swin, an innovative SAM-driven Dual-Swin Transformer for laryngo-pharyngeal tumor detection. This model leverages the robust segmentation capabilities of the Segment Anything Model 2 (SAM2) to achieve precise lesion segmentation. Meanwhile, we present a multi-scale lesion-aware enhancement module (MS-LAEM) designed to adaptively enhance the learning of nuanced complementary features across various scales, improving the quality of feature extraction and representation. Furthermore, we implement a multi-scale class-aware guidance (CAG) loss that delivers multi-scale targeted supervision, thereby enhancing the model's capacity to extract class-specific features. To validate our approach, we compiled three LPC datasets from the First Affiliated Hospital (FAHSYSU), the Sixth Affiliated Hospital (SAHSYSU) of Sun Yat-sen University, and Nanfang Hospital of Southern Medical University (NHSMU). The FAHSYSU dataset is utilized for internal training, while the SAHSYSU and NHSMU datasets serve for external evaluation. Extensive experiments demonstrate that SAM-Swin outperforms state-of-the-art methods, showcasing its potential for advancing LPC detection and improving patient outcomes. The source code of SAM-Swin is available at the URL of \href{https://github.com/VVJia/SAM-Swin}{https://github.com/VVJia/SAM-Swin}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21813v1-abstract-full').style.display = 'none'; document.getElementById('2410.21813v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09817">arXiv:2410.09817</a> <span> [<a href="https://arxiv.org/pdf/2410.09817">pdf</a>, <a href="https://arxiv.org/format/2410.09817">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Reverse Modeling in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+S">Sicheng Yu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yuanchen Xu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+C">Cunxiao Du</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yanying Zhou</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Minghui Qiu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Q">Qianru Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hao Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jiawei Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09817v1-abstract-short" style="display: inline;"> Humans are accustomed to reading and writing in a forward manner, and this natural bias extends to text understanding in auto-regressive large language models (LLMs). This paper investigates whether LLMs, like humans, struggle with reverse modeling, specifically with reversed text inputs. We found that publicly available pre-trained LLMs cannot understand such inputs. However, LLMs trained from sc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09817v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09817v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09817v1-abstract-full" style="display: none;"> Humans are accustomed to reading and writing in a forward manner, and this natural bias extends to text understanding in auto-regressive large language models (LLMs). This paper investigates whether LLMs, like humans, struggle with reverse modeling, specifically with reversed text inputs. We found that publicly available pre-trained LLMs cannot understand such inputs. However, LLMs trained from scratch with both forward and reverse texts can understand them equally well during inference. Our case study shows that different-content texts result in different losses if input (to LLMs) in different directions -- some get lower losses for forward while some for reverse. This leads us to a simple and nice solution for data selection based on the loss differences between forward and reverse directions. Using our selected data in continued pretraining can boost LLMs' performance by a large margin across different language understanding benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09817v1-abstract-full').style.display = 'none'; document.getElementById('2410.09817v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 Pages, 6 Figures, 7 Tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01459">arXiv:2409.01459</a> <span> [<a href="https://arxiv.org/pdf/2409.01459">pdf</a>, <a href="https://arxiv.org/format/2409.01459">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> 3D-LSPTM: An Automatic Framework with 3D-Large-Scale Pretrained Model for Laryngeal Cancer Detection Using Laryngoscopic Videos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Meiyu Qiu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yun Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wenjun Huang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haoyun Zhang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+W">Weiping Zheng</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+W">Wenbin Lei</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+X">Xiaomao Fan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01459v1-abstract-short" style="display: inline;"> Laryngeal cancer is a malignant disease with a high morality rate in otorhinolaryngology, posing an significant threat to human health. Traditionally larygologists manually visual-inspect laryngeal cancer in laryngoscopic videos, which is quite time-consuming and subjective. In this study, we propose a novel automatic framework via 3D-large-scale pretrained models termed 3D-LSPTM for laryngeal can… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01459v1-abstract-full').style.display = 'inline'; document.getElementById('2409.01459v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01459v1-abstract-full" style="display: none;"> Laryngeal cancer is a malignant disease with a high morality rate in otorhinolaryngology, posing an significant threat to human health. Traditionally larygologists manually visual-inspect laryngeal cancer in laryngoscopic videos, which is quite time-consuming and subjective. In this study, we propose a novel automatic framework via 3D-large-scale pretrained models termed 3D-LSPTM for laryngeal cancer detection. Firstly, we collect 1,109 laryngoscopic videos from the First Affiliated Hospital Sun Yat-sen University with the approval of the Ethics Committee. Then we utilize the 3D-large-scale pretrained models of C3D, TimeSformer, and Video-Swin-Transformer, with the merit of advanced featuring videos, for laryngeal cancer detection with fine-tuning techniques. Extensive experiments show that our proposed 3D-LSPTM can achieve promising performance on the task of laryngeal cancer detection. Particularly, 3D-LSPTM with the backbone of Video-Swin-Transformer can achieve 92.4% accuracy, 95.6% sensitivity, 94.1% precision, and 94.8% F_1. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01459v1-abstract-full').style.display = 'none'; document.getElementById('2409.01459v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05426">arXiv:2408.05426</a> <span> [<a href="https://arxiv.org/pdf/2408.05426">pdf</a>, <a href="https://arxiv.org/format/2408.05426">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SAM-FNet: SAM-Guided Fusion Network for Laryngo-Pharyngeal Tumor Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+J">Jia Wei</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yun Li</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Meiyu Qiu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hongyu Chen</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+X">Xiaomao Fan</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+W">Wenbin Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05426v2-abstract-short" style="display: inline;"> Laryngo-pharyngeal cancer (LPC) is a highly fatal malignant disease affecting the head and neck region. Previous studies on endoscopic tumor detection, particularly those leveraging dual-branch network architectures, have shown significant advancements in tumor detection. These studies highlight the potential of dual-branch networks in improving diagnostic accuracy by effectively integrating globa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05426v2-abstract-full').style.display = 'inline'; document.getElementById('2408.05426v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05426v2-abstract-full" style="display: none;"> Laryngo-pharyngeal cancer (LPC) is a highly fatal malignant disease affecting the head and neck region. Previous studies on endoscopic tumor detection, particularly those leveraging dual-branch network architectures, have shown significant advancements in tumor detection. These studies highlight the potential of dual-branch networks in improving diagnostic accuracy by effectively integrating global and local (lesion) feature extraction. However, they are still limited in their capabilities to accurately locate the lesion region and capture the discriminative feature information between the global and local branches. To address these issues, we propose a novel SAM-guided fusion network (SAM-FNet), a dual-branch network for laryngo-pharyngeal tumor detection. By leveraging the powerful object segmentation capabilities of the Segment Anything Model (SAM), we introduce the SAM into the SAM-FNet to accurately segment the lesion region. Furthermore, we propose a GAN-like feature optimization (GFO) module to capture the discriminative features between the global and local branches, enhancing the fusion feature complementarity. Additionally, we collect two LPC datasets from the First Affiliated Hospital (FAHSYSU) and the Sixth Affiliated Hospital (SAHSYSU) of Sun Yat-sen University. The FAHSYSU dataset is used as the internal dataset for training the model, while the SAHSYSU dataset is used as the external dataset for evaluating the model's performance. Extensive experiments on both datasets of FAHSYSU and SAHSYSU demonstrate that the SAM-FNet can achieve competitive results, outperforming the state-of-the-art counterparts. The source code of SAM-FNet is available at the URL of https://github.com/VVJia/SAM-FNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05426v2-abstract-full').style.display = 'none'; document.getElementById('2408.05426v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.13863">arXiv:2407.13863</a> <span> [<a href="https://arxiv.org/pdf/2407.13863">pdf</a>, <a href="https://arxiv.org/format/2407.13863">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Closer Look at GAN Priors: Exploiting Intermediate Features for Enhanced Model Inversion Attacks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+Y">Yixiang Qiu</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+H">Hao Fang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+H">Hongyao Yu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Bin Chen</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">MeiKang Qiu</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+S">Shu-Tao Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.13863v4-abstract-short" style="display: inline;"> Model Inversion (MI) attacks aim to reconstruct privacy-sensitive training data from released models by utilizing output information, raising extensive concerns about the security of Deep Neural Networks (DNNs). Recent advances in generative adversarial networks (GANs) have contributed significantly to the improved performance of MI attacks due to their powerful ability to generate realistic image… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13863v4-abstract-full').style.display = 'inline'; document.getElementById('2407.13863v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.13863v4-abstract-full" style="display: none;"> Model Inversion (MI) attacks aim to reconstruct privacy-sensitive training data from released models by utilizing output information, raising extensive concerns about the security of Deep Neural Networks (DNNs). Recent advances in generative adversarial networks (GANs) have contributed significantly to the improved performance of MI attacks due to their powerful ability to generate realistic images with high fidelity and appropriate semantics. However, previous MI attacks have solely disclosed private information in the latent space of GAN priors, limiting their semantic extraction and transferability across multiple target models and datasets. To address this challenge, we propose a novel method, Intermediate Features enhanced Generative Model Inversion (IF-GMI), which disassembles the GAN structure and exploits features between intermediate blocks. This allows us to extend the optimization space from latent code to intermediate features with enhanced expressive capabilities. To prevent GAN priors from generating unrealistic images, we apply a L1 ball constraint to the optimization process. Experiments on multiple benchmarks demonstrate that our method significantly outperforms previous approaches and achieves state-of-the-art results under various settings, especially in the out-of-distribution (OOD) scenario. Our code is available at: https://github.com/final-solution/IF-GMI <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13863v4-abstract-full').style.display = 'none'; document.getElementById('2407.13863v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECCV 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.11356">arXiv:2407.11356</a> <span> [<a href="https://arxiv.org/pdf/2407.11356">pdf</a>, <a href="https://arxiv.org/format/2407.11356">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> The Devil is in the Statistics: Mitigating and Exploiting Statistics Difference for Generalizable Semi-supervised Medical Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Muyang Qiu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jian Zhang</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+L">Lei Qi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Q">Qian Yu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yinghuan Shi</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yang Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.11356v2-abstract-short" style="display: inline;"> Despite the recent success of domain generalization in medical image segmentation, voxel-wise annotation for all source domains remains a huge burden. Semi-supervised domain generalization has been proposed very recently to combat this challenge by leveraging limited labeled data along with abundant unlabeled data collected from multiple medical institutions, depending on precisely harnessing unla… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.11356v2-abstract-full').style.display = 'inline'; document.getElementById('2407.11356v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.11356v2-abstract-full" style="display: none;"> Despite the recent success of domain generalization in medical image segmentation, voxel-wise annotation for all source domains remains a huge burden. Semi-supervised domain generalization has been proposed very recently to combat this challenge by leveraging limited labeled data along with abundant unlabeled data collected from multiple medical institutions, depending on precisely harnessing unlabeled data while improving generalization simultaneously. In this work, we observe that domain shifts between medical institutions cause disparate feature statistics, which significantly deteriorates pseudo-label quality due to an unexpected normalization process. Nevertheless, this phenomenon could be exploited to facilitate unseen domain generalization. Therefore, we propose 1) multiple statistics-individual branches to mitigate the impact of domain shifts for reliable pseudo-labels and 2) one statistics-aggregated branch for domain-invariant feature learning. Furthermore, to simulate unseen domains with statistics difference, we approach this from two aspects, i.e., a perturbation with histogram matching at image level and a random batch normalization selection strategy at feature level, producing diverse statistics to expand the training distribution. Evaluation results on three medical image datasets demonstrate the effectiveness of our method compared with recent SOTA methods. The code is available at https://github.com/qiumuyang/SIAB. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.11356v2-abstract-full').style.display = 'none'; document.getElementById('2407.11356v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ECCV 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.10563">arXiv:2407.10563</a> <span> [<a href="https://arxiv.org/pdf/2407.10563">pdf</a>, <a href="https://arxiv.org/format/2407.10563">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Pathformer3D: A 3D Scanpath Transformer for 360掳 Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Quan%2C+R">Rong Quan</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+Y">Yantao Lai</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Mengyu Qiu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+D">Dong Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.10563v1-abstract-short" style="display: inline;"> Scanpath prediction in 360掳 images can help realize rapid rendering and better user interaction in Virtual/Augmented Reality applications. However, existing scanpath prediction models for 360掳 images execute scanpath prediction on 2D equirectangular projection plane, which always result in big computation error owing to the 2D plane's distortion and coordinate discontinuity. In this work, we perfo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10563v1-abstract-full').style.display = 'inline'; document.getElementById('2407.10563v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.10563v1-abstract-full" style="display: none;"> Scanpath prediction in 360掳 images can help realize rapid rendering and better user interaction in Virtual/Augmented Reality applications. However, existing scanpath prediction models for 360掳 images execute scanpath prediction on 2D equirectangular projection plane, which always result in big computation error owing to the 2D plane's distortion and coordinate discontinuity. In this work, we perform scanpath prediction for 360掳 images in 3D spherical coordinate system and proposed a novel 3D scanpath Transformer named Pathformer3D. Specifically, a 3D Transformer encoder is first used to extract 3D contextual feature representation for the 360掳 image. Then, the contextual feature representation and historical fixation information are input into a Transformer decoder to output current time step's fixation embedding, where the self-attention module is used to imitate the visual working memory mechanism of human visual system and directly model the time dependencies among the fixations. Finally, a 3D Gaussian distribution is learned from each fixation embedding, from which the fixation position can be sampled. Evaluation on four panoramic eye-tracking datasets demonstrates that Pathformer3D outperforms the current state-of-the-art methods. Code is available at https://github.com/lsztzp/Pathformer3D . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10563v1-abstract-full').style.display = 'none'; document.getElementById('2407.10563v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECCV 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.09966">arXiv:2407.09966</a> <span> [<a href="https://arxiv.org/pdf/2407.09966">pdf</a>, <a href="https://arxiv.org/format/2407.09966">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Optimizing ROI Benefits Vehicle ReID in ITS </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Mei Qiu</a>, <a href="/search/cs?searchtype=author&query=Christopher%2C+L+A">Lauren Ann Christopher</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lingxi Li</a>, <a href="/search/cs?searchtype=author&query=Chien%2C+S">Stanley Chien</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yaobin Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.09966v1-abstract-short" style="display: inline;"> Vehicle re-identification (ReID) is a computer vision task that matches the same vehicle across different cameras or viewpoints in a surveillance system. This is crucial for Intelligent Transportation Systems (ITS), where the effectiveness is influenced by the regions from which vehicle images are cropped. This study explores whether optimal vehicle detection regions, guided by detection confidenc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09966v1-abstract-full').style.display = 'inline'; document.getElementById('2407.09966v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.09966v1-abstract-full" style="display: none;"> Vehicle re-identification (ReID) is a computer vision task that matches the same vehicle across different cameras or viewpoints in a surveillance system. This is crucial for Intelligent Transportation Systems (ITS), where the effectiveness is influenced by the regions from which vehicle images are cropped. This study explores whether optimal vehicle detection regions, guided by detection confidence scores, can enhance feature matching and ReID tasks. Using our framework with multiple Regions of Interest (ROIs) and lane-wise vehicle counts, we employed YOLOv8 for detection and DeepSORT for tracking across twelve Indiana Highway videos, including two pairs of videos from non-overlapping cameras. Tracked vehicle images were cropped from inside and outside the ROIs at five-frame intervals. Features were extracted using pre-trained models: ResNet50, ResNeXt50, Vision Transformer, and Swin-Transformer. Feature consistency was assessed through cosine similarity, information entropy, and clustering variance. Results showed that features from images cropped inside ROIs had higher mean cosine similarity values compared to those involving one image inside and one outside the ROIs. The most significant difference was observed during night conditions (0.7842 inside vs. 0.5 outside the ROI with Swin-Transformer) and in cross-camera scenarios (0.75 inside-inside vs. 0.52 inside-outside the ROI with Vision Transformer). Information entropy and clustering variance further supported that features in ROIs are more consistent. These findings suggest that strategically selected ROIs can enhance tracking performance and ReID accuracy in ITS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09966v1-abstract-full').style.display = 'none'; document.getElementById('2407.09966v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.07842">arXiv:2407.07842</a> <span> [<a href="https://arxiv.org/pdf/2407.07842">pdf</a>, <a href="https://arxiv.org/format/2407.07842">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Study on Aspect Ratio Variability toward Robustness of Vision Transformer-based Vehicle Re-identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Mei Qiu</a>, <a href="/search/cs?searchtype=author&query=Christopher%2C+L">Lauren Christopher</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lingxi Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.07842v1-abstract-short" style="display: inline;"> Vision Transformers (ViTs) have excelled in vehicle re-identification (ReID) tasks. However, non-square aspect ratios of image or video input might significantly affect the re-identification performance. To address this issue, we propose a novel ViT-based ReID framework in this paper, which fuses models trained on a variety of aspect ratios. Our main contributions are threefold: (i) We analyze asp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07842v1-abstract-full').style.display = 'inline'; document.getElementById('2407.07842v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.07842v1-abstract-full" style="display: none;"> Vision Transformers (ViTs) have excelled in vehicle re-identification (ReID) tasks. However, non-square aspect ratios of image or video input might significantly affect the re-identification performance. To address this issue, we propose a novel ViT-based ReID framework in this paper, which fuses models trained on a variety of aspect ratios. Our main contributions are threefold: (i) We analyze aspect ratio performance on VeRi-776 and VehicleID datasets, guiding input settings based on aspect ratios of original images. (ii) We introduce patch-wise mixup intra-image during ViT patchification (guided by spatial attention scores) and implement uneven stride for better object aspect ratio matching. (iii) We propose a dynamic feature fusing ReID network, enhancing model robustness. Our ReID method achieves a significantly improved mean Average Precision (mAP) of 91.0\% compared to the the closest state-of-the-art (CAL) result of 80.9\% on VehicleID dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07842v1-abstract-full').style.display = 'none'; document.getElementById('2407.07842v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.05610">arXiv:2407.05610</a> <span> [<a href="https://arxiv.org/pdf/2407.05610">pdf</a>, <a href="https://arxiv.org/format/2407.05610">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Described Spatial-Temporal Video Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ji%2C+W">Wei Ji</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiangyan Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yingfei Sun</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+J">Jiajun Deng</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+Y">You Qin</a>, <a href="/search/cs?searchtype=author&query=Nuwanna%2C+A">Ammar Nuwanna</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Mengyao Qiu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lina Wei</a>, <a href="/search/cs?searchtype=author&query=Zimmermann%2C+R">Roger Zimmermann</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.05610v1-abstract-short" style="display: inline;"> Detecting visual content on language expression has become an emerging topic in the community. However, in the video domain, the existing setting, i.e., spatial-temporal video grounding (STVG), is formulated to only detect one pre-existing object in each frame, ignoring the fact that language descriptions can involve none or multiple entities within a video. In this work, we advance the STVG to a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05610v1-abstract-full').style.display = 'inline'; document.getElementById('2407.05610v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.05610v1-abstract-full" style="display: none;"> Detecting visual content on language expression has become an emerging topic in the community. However, in the video domain, the existing setting, i.e., spatial-temporal video grounding (STVG), is formulated to only detect one pre-existing object in each frame, ignoring the fact that language descriptions can involve none or multiple entities within a video. In this work, we advance the STVG to a more practical setting called described spatial-temporal video detection (DSTVD) by overcoming the above limitation. To facilitate the exploration of DSTVD, we first introduce a new benchmark, namely DVD-ST. Notably, DVD-ST supports grounding from none to many objects onto the video in response to queries and encompasses a diverse range of over 150 entities, including appearance, actions, locations, and interactions. The extensive breadth and diversity of the DVD-ST dataset make it an exemplary testbed for the investigation of DSTVD. In addition to the new benchmark, we further present two baseline methods for our proposed DSTVD task by extending two representative STVG models, i.e., TubeDETR, and STCAT. These extended models capitalize on tubelet queries to localize and track referred objects across the video sequence. Besides, we adjust the training objectives of these models to optimize spatial and temporal localization accuracy and multi-class classification capabilities. Furthermore, we benchmark the baselines on the introduced DVD-ST dataset and conduct extensive experimental analysis to guide future investigation. Our code and benchmark will be publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05610v1-abstract-full').style.display = 'none'; document.getElementById('2407.05610v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.04688">arXiv:2407.04688</a> <span> [<a href="https://arxiv.org/pdf/2407.04688">pdf</a>, <a href="https://arxiv.org/format/2407.04688">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Vehicle Re-identification and Matching for Weaving Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Mei Qiu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+W">Wei Lin</a>, <a href="/search/cs?searchtype=author&query=Chien%2C+S">Stanley Chien</a>, <a href="/search/cs?searchtype=author&query=Christopher%2C+L">Lauren Christopher</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yaobin Chen</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+S">Shu Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.04688v1-abstract-short" style="display: inline;"> Vehicle weaving on highways contributes to traffic congestion, raises safety issues, and underscores the need for sophisticated traffic management systems. Current tools are inadequate in offering precise and comprehensive data on lane-specific weaving patterns. This paper introduces an innovative method for collecting non-overlapping video data in weaving zones, enabling the generation of quantit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04688v1-abstract-full').style.display = 'inline'; document.getElementById('2407.04688v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.04688v1-abstract-full" style="display: none;"> Vehicle weaving on highways contributes to traffic congestion, raises safety issues, and underscores the need for sophisticated traffic management systems. Current tools are inadequate in offering precise and comprehensive data on lane-specific weaving patterns. This paper introduces an innovative method for collecting non-overlapping video data in weaving zones, enabling the generation of quantitative insights into lane-specific weaving behaviors. Our experimental results confirm the efficacy of this approach, delivering critical data that can assist transportation authorities in enhancing traffic control and roadway infrastructure. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04688v1-abstract-full').style.display = 'none'; document.getElementById('2407.04688v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.00748">arXiv:2407.00748</a> <span> [<a href="https://arxiv.org/pdf/2407.00748">pdf</a>, <a href="https://arxiv.org/format/2407.00748">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3637528.3671737">10.1145/3637528.3671737 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Self-consistent Deep Geometric Learning for Heterogeneous Multi-source Spatial Point Data Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dazhou Yu</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+X">Xiaoyun Gong</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yun Li</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Meikang Qiu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+L">Liang Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.00748v1-abstract-short" style="display: inline;"> Multi-source spatial point data prediction is crucial in fields like environmental monitoring and natural resource management, where integrating data from various sensors is the key to achieving a holistic environmental understanding. Existing models in this area often fall short due to their domain-specific nature and lack a strategy for integrating information from various sources in the absence… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00748v1-abstract-full').style.display = 'inline'; document.getElementById('2407.00748v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.00748v1-abstract-full" style="display: none;"> Multi-source spatial point data prediction is crucial in fields like environmental monitoring and natural resource management, where integrating data from various sensors is the key to achieving a holistic environmental understanding. Existing models in this area often fall short due to their domain-specific nature and lack a strategy for integrating information from various sources in the absence of ground truth labels. Key challenges include evaluating the quality of different data sources and modeling spatial relationships among them effectively. Addressing these issues, we introduce an innovative multi-source spatial point data prediction framework that adeptly aligns information from varied sources without relying on ground truth labels. A unique aspect of our method is the 'fidelity score,' a quantitative measure for evaluating the reliability of each data source. Furthermore, we develop a geo-location-aware graph neural network tailored to accurately depict spatial relationships between data points. Our framework has been rigorously tested on two real-world datasets and one synthetic dataset. The results consistently demonstrate its superior performance over existing state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00748v1-abstract-full').style.display = 'none'; document.getElementById('2407.00748v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.09317">arXiv:2406.09317</a> <span> [<a href="https://arxiv.org/pdf/2406.09317">pdf</a>, <a href="https://arxiv.org/format/2406.09317">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Common and Rare Fundus Diseases Identification Using Vision-Language Foundation Model with Knowledge of Over 400 Diseases </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+M">Meng Wang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+T">Tian Lin</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+A">Aidi Lin</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+K">Kai Yu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+Y">Yuanyuan Peng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lianyu Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Cheng Chen</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+K">Ke Zou</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+H">Huiyu Liang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Man Chen</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xue Yao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Meiqin Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+B">Binwei Huang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+C">Chaoxin Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Peixin Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wei Chen</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Y">Yilong Luo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yifan Chen</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+H">Honghe Xia</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+T">Tingkun Shi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jinming Guo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiaolin Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jingcheng Wang</a>, <a href="/search/cs?searchtype=author&query=Tham%2C+Y+C">Yih Chung Tham</a> , et al. (24 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.09317v2-abstract-short" style="display: inline;"> Previous foundation models for retinal images were pre-trained with limited disease categories and knowledge base. Here we introduce RetiZero, a vision-language foundation model that leverages knowledge from over 400 fundus diseases. To RetiZero's pre-training, we compiled 341,896 fundus images paired with text descriptions, sourced from public datasets, ophthalmic literature, and online resources… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09317v2-abstract-full').style.display = 'inline'; document.getElementById('2406.09317v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.09317v2-abstract-full" style="display: none;"> Previous foundation models for retinal images were pre-trained with limited disease categories and knowledge base. Here we introduce RetiZero, a vision-language foundation model that leverages knowledge from over 400 fundus diseases. To RetiZero's pre-training, we compiled 341,896 fundus images paired with text descriptions, sourced from public datasets, ophthalmic literature, and online resources, encompassing a diverse range of diseases across multiple ethnicities and countries. RetiZero exhibits superior performance in several downstream tasks, including zero-shot disease recognition, image-to-image retrieval, and internal- and cross-domain disease identification. In zero-shot scenarios, RetiZero achieves Top5 accuracy scores of 0.8430 for 15 fundus diseases and 0.7561 for 52 fundus diseases. For image retrieval, it achieves Top5 scores of 0.9500 and 0.8860 for the same disease sets, respectively. Clinical evaluations show that RetiZero's Top3 zero-shot performance surpasses the average of 19 ophthalmologists from Singapore, China and the United States. Furthermore, RetiZero significantly enhances clinicians' accuracy in diagnosing fundus disease. These findings underscore the value of integrating the RetiZero foundation model into clinical settings, where a variety of fundus diseases are encountered. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09317v2-abstract-full').style.display = 'none'; document.getElementById('2406.09317v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.05704">arXiv:2406.05704</a> <span> [<a href="https://arxiv.org/pdf/2406.05704">pdf</a>, <a href="https://arxiv.org/format/2406.05704">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Hierarchical Features Matter: A Deep Exploration of GAN Priors for Improved Dataset Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhong%2C+X">Xinhao Zhong</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+H">Hao Fang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Bin Chen</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+X">Xulin Gu</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+T">Tao Dai</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Meikang Qiu</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+S">Shu-Tao Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.05704v2-abstract-short" style="display: inline;"> Dataset distillation is an emerging dataset reduction method, which condenses large-scale datasets while maintaining task accuracy. Current methods have integrated parameterization techniques to boost synthetic dataset performance by shifting the optimization space from pixel to another informative feature domain. However, they limit themselves to a fixed optimization space for distillation, negle… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05704v2-abstract-full').style.display = 'inline'; document.getElementById('2406.05704v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.05704v2-abstract-full" style="display: none;"> Dataset distillation is an emerging dataset reduction method, which condenses large-scale datasets while maintaining task accuracy. Current methods have integrated parameterization techniques to boost synthetic dataset performance by shifting the optimization space from pixel to another informative feature domain. However, they limit themselves to a fixed optimization space for distillation, neglecting the diverse guidance across different informative latent spaces. To overcome this limitation, we propose a novel parameterization method dubbed Hierarchical Generative Latent Distillation (H-GLaD), to systematically explore hierarchical layers within the generative adversarial networks (GANs). This allows us to progressively span from the initial latent space to the final pixel space. In addition, we introduce a novel class-relevant feature distance metric to alleviate the computational burden associated with synthetic dataset evaluation, bridging the gap between synthetic and original datasets. Experimental results demonstrate that the proposed H-GLaD achieves a significant improvement in both same-architecture and cross-architecture performance with equivalent time consumption. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05704v2-abstract-full').style.display = 'none'; document.getElementById('2406.05704v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16919">arXiv:2405.16919</a> <span> [<a href="https://arxiv.org/pdf/2405.16919">pdf</a>, <a href="https://arxiv.org/format/2405.16919">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> VoCoT: Unleashing Visually Grounded Multi-Step Reasoning in Large Multi-Modal Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zejun Li</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+R">Ruipu Luo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiwen Zhang</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Minghui Qiu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Z">Zhongyu Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16919v2-abstract-short" style="display: inline;"> While large multi-modal models (LMMs) have exhibited impressive capabilities across diverse tasks, their effectiveness in handling complex tasks has been limited by the prevailing single-step reasoning paradigm. To this end, this paper proposes VoCoT, a multi-step Visually grounded object-centric Chain-of-Thought reasoning framework tailored for inference with LMMs. VoCoT is characterized by two k… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16919v2-abstract-full').style.display = 'inline'; document.getElementById('2405.16919v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16919v2-abstract-full" style="display: none;"> While large multi-modal models (LMMs) have exhibited impressive capabilities across diverse tasks, their effectiveness in handling complex tasks has been limited by the prevailing single-step reasoning paradigm. To this end, this paper proposes VoCoT, a multi-step Visually grounded object-centric Chain-of-Thought reasoning framework tailored for inference with LMMs. VoCoT is characterized by two key features: (1) object-centric reasoning paths that revolve around cross-modal shared object-level information, and (2) visually grounded representation of object concepts in a multi-modal interleaved and aligned manner, which effectively bridges the modality gap within LMMs during long-term generation. Additionally, we construct an instruction dataset to facilitate LMMs in adapting to reasoning with VoCoT. By introducing VoCoT into the prevalent open-source LMM architecture, we introduce VolCano. With only 7B parameters and limited input resolution, VolCano demonstrates excellent performance across various scenarios, surpassing SOTA models, including GPT-4V, in tasks requiring complex reasoning. Our code, data and model will be available at https://github.com/RupertLuo/VoCoT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16919v2-abstract-full').style.display = 'none'; document.getElementById('2405.16919v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.07547">arXiv:2405.07547</a> <span> [<a href="https://arxiv.org/pdf/2405.07547">pdf</a>, <a href="https://arxiv.org/format/2405.07547">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Channel Coding Toward 6G: Technical Overview and Outlook </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rowshan%2C+M">Mohammad Rowshan</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Min Qiu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yixuan Xie</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+X">Xinyi Gu</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+J">Jinhong Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.07547v1-abstract-short" style="display: inline;"> Channel coding plays a pivotal role in ensuring reliable communication over wireless channels. With the growing need for ultra-reliable communication in emerging wireless use cases, the significance of channel coding has amplified. Furthermore, minimizing decoding latency is crucial for critical-mission applications, while optimizing energy efficiency is paramount for mobile and the Internet of Th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.07547v1-abstract-full').style.display = 'inline'; document.getElementById('2405.07547v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.07547v1-abstract-full" style="display: none;"> Channel coding plays a pivotal role in ensuring reliable communication over wireless channels. With the growing need for ultra-reliable communication in emerging wireless use cases, the significance of channel coding has amplified. Furthermore, minimizing decoding latency is crucial for critical-mission applications, while optimizing energy efficiency is paramount for mobile and the Internet of Things (IoT) communications. As the fifth generation (5G) of mobile communications is currently in operation and 5G-advanced is on the horizon, the objective of this paper is to assess prominent channel coding schemes in the context of recent advancements and the anticipated requirements for the sixth generation (6G). In this paper, after considering the potential impact of channel coding on key performance indicators (KPIs) of wireless networks, we review the evolution of mobile communication standards and the organizations involved in the standardization, from the first generation (1G) to the current 5G, highlighting the technologies integral to achieving targeted KPIs such as reliability, data rate, latency, energy efficiency, spectral efficiency, connection density, and traffic capacity. Following this, we delve into the anticipated requirements for potential use cases in 6G. The subsequent sections of the paper focus on a comprehensive review of three primary coding schemes utilized in past generations and their recent advancements: low-density parity-check (LDPC) codes, turbo codes (including convolutional codes), polar codes (alongside Reed-Muller codes). Additionally, we examine alternative coding schemes like Fountain codes and sparse regression codes. Our evaluation includes a comparative analysis of error correction performance and the performance of hardware implementation for these coding schemes, providing insights into their potential and suitability for the upcoming 6G era. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.07547v1-abstract-full').style.display = 'none'; document.getElementById('2405.07547v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">102 pages, 87 figures, IEEE Open Journal of the Communications Society (invited paper)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.15212">arXiv:2404.15212</a> <span> [<a href="https://arxiv.org/pdf/2404.15212">pdf</a>, <a href="https://arxiv.org/format/2404.15212">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Real-time Lane-wise Traffic Monitoring in Optimal ROIs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Mei Qiu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+W">Wei Lin</a>, <a href="/search/cs?searchtype=author&query=Christopher%2C+L+A">Lauren Ann Christopher</a>, <a href="/search/cs?searchtype=author&query=Chien%2C+S">Stanley Chien</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yaobin Chen</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+S">Shu Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.15212v1-abstract-short" style="display: inline;"> In the US, thousands of Pan, Tilt, and Zoom (PTZ) traffic cameras monitor highway conditions. There is a great interest in using these highway cameras to gather valuable road traffic data to support traffic analysis and decision-making for highway safety and efficient traffic management. However, there are too many cameras for a few human traffic operators to effectively monitor, so a fully automa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.15212v1-abstract-full').style.display = 'inline'; document.getElementById('2404.15212v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.15212v1-abstract-full" style="display: none;"> In the US, thousands of Pan, Tilt, and Zoom (PTZ) traffic cameras monitor highway conditions. There is a great interest in using these highway cameras to gather valuable road traffic data to support traffic analysis and decision-making for highway safety and efficient traffic management. However, there are too many cameras for a few human traffic operators to effectively monitor, so a fully automated solution is desired. This paper introduces a novel system that learns the locations of highway lanes and traffic directions from these camera feeds automatically. It collects real-time, lane-specific traffic data continuously, even adjusting for changes in camera angle or zoom. This facilitates efficient traffic analysis, decision-making, and improved highway safety. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.15212v1-abstract-full').style.display = 'none'; document.getElementById('2404.15212v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.04861">arXiv:2404.04861</a> <span> [<a href="https://arxiv.org/pdf/2404.04861">pdf</a>, <a href="https://arxiv.org/format/2404.04861">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Privacy-Preserving Traceable Functional Encryption for Inner Product </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Muyao Qiu</a>, <a href="/search/cs?searchtype=author&query=Han%2C+J">Jinguang Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.04861v2-abstract-short" style="display: inline;"> Functional encryption introduces a new paradigm of public key encryption that decryption only reveals the function value of encrypted data. To curb key leakage issues and trace users in FE-IP, a new primitive called traceable functional encryption for inner product (TFE-IP) has been proposed. However, the privacy protection of user's identities has not been considered in the existing TFE-IP scheme… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.04861v2-abstract-full').style.display = 'inline'; document.getElementById('2404.04861v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.04861v2-abstract-full" style="display: none;"> Functional encryption introduces a new paradigm of public key encryption that decryption only reveals the function value of encrypted data. To curb key leakage issues and trace users in FE-IP, a new primitive called traceable functional encryption for inner product (TFE-IP) has been proposed. However, the privacy protection of user's identities has not been considered in the existing TFE-IP schemes. In order to balance privacy and accountability, we propose the concept of privacy-preserving traceable functional encryption for inner product (PPTFE-IP) and give a concrete construction. Our scheme provides the following features: (1) To prevent key sharing, a user's key is bound with both his/her identity and a vector; (2) The key generation center (KGC) and a user execute a two-party secure computing protocol to generate a key without the former knowing anything about the latter's identity; (3) Each user can verify the correctness of his/her key; (4) A user can calculate the inner product of the two vectors embedded in his/her key and in a ciphertext; (5) Only the tracer can trace the identity embedded in a key. The security of our scheme is formally reduced to well-known complexity assumptions, and the implementation is conducted to evaluate its efficiency. The novelty of our scheme is to protect users' privacy and provide traceability if required. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.04861v2-abstract-full').style.display = 'none'; document.getElementById('2404.04861v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.14922">arXiv:2403.14922</a> <span> [<a href="https://arxiv.org/pdf/2403.14922">pdf</a>, <a href="https://arxiv.org/format/2403.14922">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> CODA: A COst-efficient Test-time Domain Adaptation Mechanism for HAR </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Minghui Qiu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yandao Huang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Lin Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lu Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+K">Kaishun Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.14922v1-abstract-short" style="display: inline;"> In recent years, emerging research on mobile sensing has led to novel scenarios that enhance daily life for humans, but dynamic usage conditions often result in performance degradation when systems are deployed in real-world settings. Existing solutions typically employ one-off adaptation schemes based on neural networks, which struggle to ensure robustness against uncertain drifting conditions in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.14922v1-abstract-full').style.display = 'inline'; document.getElementById('2403.14922v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.14922v1-abstract-full" style="display: none;"> In recent years, emerging research on mobile sensing has led to novel scenarios that enhance daily life for humans, but dynamic usage conditions often result in performance degradation when systems are deployed in real-world settings. Existing solutions typically employ one-off adaptation schemes based on neural networks, which struggle to ensure robustness against uncertain drifting conditions in human-centric sensing scenarios. In this paper, we propose CODA, a COst-efficient Domain Adaptation mechanism for mobile sensing that addresses real-time drifts from the data distribution perspective with active learning theory, ensuring cost-efficient adaptation directly on the device. By incorporating a clustering loss and importance-weighted active learning algorithm, CODA retains the relationship between different clusters during cost-effective instance-level updates, preserving meaningful structure within the data distribution. We also showcase its generalization by seamlessly integrating it with Neural Network-based solutions for Human Activity Recognition tasks. Through meticulous evaluations across diverse datasets, including phone-based, watch-based, and integrated sensor-based sensing tasks, we demonstrate the feasibility and potential of online adaptation with CODA. The promising results achieved by CODA, even without learnable parameters, also suggest the possibility of realizing unobtrusive adaptation through specific application designs with sufficient feedback. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.14922v1-abstract-full').style.display = 'none'; document.getElementById('2403.14922v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.17613">arXiv:2402.17613</a> <span> [<a href="https://arxiv.org/pdf/2402.17613">pdf</a>, <a href="https://arxiv.org/format/2402.17613">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Neural Automated Writing Evaluation with Corrective Feedback </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+I+X">Izia Xiaoxiao Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xihan Wu</a>, <a href="/search/cs?searchtype=author&query=Coates%2C+E">Edith Coates</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+M">Min Zeng</a>, <a href="/search/cs?searchtype=author&query=Kuang%2C+J">Jiexin Kuang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Siliang Liu</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Mengyang Qiu</a>, <a href="/search/cs?searchtype=author&query=Park%2C+J">Jungyeul Park</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.17613v2-abstract-short" style="display: inline;"> The utilization of technology in second language learning and teaching has become ubiquitous. For the assessment of writing specifically, automated writing evaluation (AWE) and grammatical error correction (GEC) have become immensely popular and effective methods for enhancing writing proficiency and delivering instant and individualized feedback to learners. By leveraging the power of natural lan… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.17613v2-abstract-full').style.display = 'inline'; document.getElementById('2402.17613v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.17613v2-abstract-full" style="display: none;"> The utilization of technology in second language learning and teaching has become ubiquitous. For the assessment of writing specifically, automated writing evaluation (AWE) and grammatical error correction (GEC) have become immensely popular and effective methods for enhancing writing proficiency and delivering instant and individualized feedback to learners. By leveraging the power of natural language processing (NLP) and machine learning algorithms, AWE and GEC systems have been developed separately to provide language learners with automated corrective feedback and more accurate and unbiased scoring that would otherwise be subject to examiners. In this paper, we propose an integrated system for automated writing evaluation with corrective feedback as a means of bridging the gap between AWE and GEC results for second language learners. This system enables language learners to simulate the essay writing tests: a student writes and submits an essay, and the system returns the assessment of the writing along with suggested grammatical error corrections. Given that automated scoring and grammatical correction are more efficient and cost-effective than human grading, this integrated system would also alleviate the burden of manually correcting innumerable essays. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.17613v2-abstract-full').style.display = 'none'; document.getElementById('2402.17613v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Supported by the SoTL Seed Program at UBC</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.15931">arXiv:2402.15931</a> <span> [<a href="https://arxiv.org/pdf/2402.15931">pdf</a>, <a href="https://arxiv.org/ps/2402.15931">ps</a>, <a href="https://arxiv.org/format/2402.15931">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Frustratingly Simple Prompting-based Text Denoising </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Park%2C+J">Jungyeul Park</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Mengyang Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.15931v1-abstract-short" style="display: inline;"> This paper introduces a novel perspective on the automated essay scoring (AES) task, challenging the conventional view of the ASAP dataset as a static entity. Employing simple text denoising techniques using prompting, we explore the dynamic potential within the dataset. While acknowledging the previous emphasis on building regression systems, our paper underscores how making minor changes to a da… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15931v1-abstract-full').style.display = 'inline'; document.getElementById('2402.15931v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.15931v1-abstract-full" style="display: none;"> This paper introduces a novel perspective on the automated essay scoring (AES) task, challenging the conventional view of the ASAP dataset as a static entity. Employing simple text denoising techniques using prompting, we explore the dynamic potential within the dataset. While acknowledging the previous emphasis on building regression systems, our paper underscores how making minor changes to a dataset through text denoising can enhance the final results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15931v1-abstract-full').style.display = 'none'; document.getElementById('2402.15931v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published as a Tiny Paper at ICLR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.15930">arXiv:2402.15930</a> <span> [<a href="https://arxiv.org/pdf/2402.15930">pdf</a>, <a href="https://arxiv.org/ps/2402.15930">ps</a>, <a href="https://arxiv.org/format/2402.15930">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Evaluating Prompting Strategies for Grammatical Error Correction Based on Language Proficiency </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+M">Min Zeng</a>, <a href="/search/cs?searchtype=author&query=Kuang%2C+J">Jiexin Kuang</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Mengyang Qiu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+J">Jayoung Song</a>, <a href="/search/cs?searchtype=author&query=Park%2C+J">Jungyeul Park</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.15930v1-abstract-short" style="display: inline;"> The writing examples of English language learners may be different from those of native speakers. Given that there is a significant differences in second language (L2) learners' error types by their proficiency levels, this paper attempts to reduce overcorrection by examining the interaction between LLM's performance and L2 language proficiency. Our method focuses on zero-shot and few-shot prompti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15930v1-abstract-full').style.display = 'inline'; document.getElementById('2402.15930v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.15930v1-abstract-full" style="display: none;"> The writing examples of English language learners may be different from those of native speakers. Given that there is a significant differences in second language (L2) learners' error types by their proficiency levels, this paper attempts to reduce overcorrection by examining the interaction between LLM's performance and L2 language proficiency. Our method focuses on zero-shot and few-shot prompting and fine-tuning models for GEC for learners of English as a foreign language based on the different proficiency. We investigate GEC results and find that overcorrection happens primarily in advanced language learners' writing (proficiency C) rather than proficiency A (a beginner level) and proficiency B (an intermediate level). Fine-tuned LLMs, and even few-shot prompting with writing examples of English learners, actually tend to exhibit decreased recall measures. To make our claim concrete, we conduct a comprehensive examination of GEC outcomes and their evaluation results based on language proficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15930v1-abstract-full').style.display = 'none'; document.getElementById('2402.15930v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in LREC-COLING 2024, short paper (preprint)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.15521">arXiv:2402.15521</a> <span> [<a href="https://arxiv.org/pdf/2402.15521">pdf</a>, <a href="https://arxiv.org/format/2402.15521">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> HKD-SHO: A hybrid smart home system based on knowledge-based and data-driven services </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Mingming Qiu</a>, <a href="/search/cs?searchtype=author&query=Najm%2C+E">Elie Najm</a>, <a href="/search/cs?searchtype=author&query=Sharrock%2C+R">R茅mi Sharrock</a>, <a href="/search/cs?searchtype=author&query=Traverson%2C+B">Bruno Traverson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.15521v1-abstract-short" style="display: inline;"> A smart home is realized by setting up various services. Several methods have been proposed to create smart home services, which can be divided into knowledge-based and data-driven approaches. However, knowledge-based approaches usually require manual input from the inhabitant, which can be complicated if the physical phenomena of the concerned environment states are complex, and the inhabitant do… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15521v1-abstract-full').style.display = 'inline'; document.getElementById('2402.15521v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.15521v1-abstract-full" style="display: none;"> A smart home is realized by setting up various services. Several methods have been proposed to create smart home services, which can be divided into knowledge-based and data-driven approaches. However, knowledge-based approaches usually require manual input from the inhabitant, which can be complicated if the physical phenomena of the concerned environment states are complex, and the inhabitant does not know how to adjust related actuators to achieve the target values of the states monitored by services. Moreover, machine learning-based data-driven approaches that we are interested in are like black boxes and cannot show the inhabitant in which situations certain services proposed certain actuators' states. To solve these problems, we propose a hybrid system called HKD-SHO (Hybrid Knowledge-based and Data-driven services based Smart HOme system), where knowledge-based and machine learning-based data-driven services are profitably integrated. The principal advantage is that it inherits the explicability of knowledge-based services and the dynamism of data-driven services. We compare HKD-SHO with several systems for creating dynamic smart home services, and the results show the better performance of HKD-SHO. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15521v1-abstract-full').style.display = 'none'; document.getElementById('2402.15521v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">keywords: Hybrid System, Knowledge Representation, Reinforcement Learning, Services, Smart Home</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.11058">arXiv:2401.11058</a> <span> [<a href="https://arxiv.org/pdf/2401.11058">pdf</a>, <a href="https://arxiv.org/ps/2401.11058">ps</a>, <a href="https://arxiv.org/format/2401.11058">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Low Complexity Turbo SIC-MMSE Detection for Orthogonal Time Frequency Space Modulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qi Li</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+J">Jinhong Yuan</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Min Qiu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shuangyang Li</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yixuan Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.11058v1-abstract-short" style="display: inline;"> Recently, orthogonal time frequency space (OTFS) modulation has garnered considerable attention due to its robustness against doubly-selective wireless channels. In this paper, we propose a low-complexity iterative successive interference cancellation based minimum mean squared error (SIC-MMSE) detection algorithm for zero-padded OTFS (ZP-OTFS) modulation. In the proposed algorithm, signals are de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.11058v1-abstract-full').style.display = 'inline'; document.getElementById('2401.11058v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.11058v1-abstract-full" style="display: none;"> Recently, orthogonal time frequency space (OTFS) modulation has garnered considerable attention due to its robustness against doubly-selective wireless channels. In this paper, we propose a low-complexity iterative successive interference cancellation based minimum mean squared error (SIC-MMSE) detection algorithm for zero-padded OTFS (ZP-OTFS) modulation. In the proposed algorithm, signals are detected based on layers processed by multiple SIC-MMSE linear filters for each sub-channel, with interference on the targeted signal layer being successively canceled either by hard or soft information. To reduce the complexity of computing individual layer filter coefficients, we also propose a novel filter coefficients recycling approach in place of generating the exact form of MMSE filter weights. Moreover, we design a joint detection and decoding algorithm for ZP-OTFS to enhance error performance. Compared to the conventional SIC-MMSE detection, our proposed algorithms outperform other linear detectors, e.g., maximal ratio combining (MRC), for ZP-OTFS with up to 3 dB gain while maintaining comparable computation complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.11058v1-abstract-full').style.display = 'none'; document.getElementById('2401.11058v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 12 figures, accepted by IEEE Transactions on Communications</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.01433">arXiv:2401.01433</a> <span> [<a href="https://arxiv.org/pdf/2401.01433">pdf</a>, <a href="https://arxiv.org/format/2401.01433">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Multiple Access Techniques for Intelligent and Multi-Functional 6G: Tutorial, Survey, and Outlook </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Clerckx%2C+B">Bruno Clerckx</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+Y">Yijie Mao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhaohui Yang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Mingzhe Chen</a>, <a href="/search/cs?searchtype=author&query=Alkhateeb%2C+A">Ahmed Alkhateeb</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Liang Liu</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Min Qiu</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+J">Jinhong Yuan</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+V+W+S">Vincent W. S. Wong</a>, <a href="/search/cs?searchtype=author&query=Montojo%2C+J">Juan Montojo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.01433v1-abstract-short" style="display: inline;"> Multiple access (MA) is a crucial part of any wireless system and refers to techniques that make use of the resource dimensions to serve multiple users/devices/machines/services, ideally in the most efficient way. Given the needs of multi-functional wireless networks for integrated communications, sensing, localization, computing, coupled with the surge of machine learning / artificial intelligenc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01433v1-abstract-full').style.display = 'inline'; document.getElementById('2401.01433v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.01433v1-abstract-full" style="display: none;"> Multiple access (MA) is a crucial part of any wireless system and refers to techniques that make use of the resource dimensions to serve multiple users/devices/machines/services, ideally in the most efficient way. Given the needs of multi-functional wireless networks for integrated communications, sensing, localization, computing, coupled with the surge of machine learning / artificial intelligence (AI) in wireless networks, MA techniques are expected to experience a paradigm shift in 6G and beyond. In this paper, we provide a tutorial, survey and outlook of past, emerging and future MA techniques and pay a particular attention to how wireless network intelligence and multi-functionality will lead to a re-thinking of those techniques. The paper starts with an overview of orthogonal, physical layer multicasting, space domain, power domain, ratesplitting, code domain MAs, and other domains, and highlight the importance of researching universal multiple access to shrink instead of grow the knowledge tree of MA schemes by providing a unified understanding of MA schemes across all resource dimensions. It then jumps into rethinking MA schemes in the era of wireless network intelligence, covering AI for MA such as AI-empowered resource allocation, optimization, channel estimation, receiver designs, user behavior predictions, and MA for AI such as federated learning/edge intelligence and over the air computation. We then discuss MA for network multi-functionality and the interplay between MA and integrated sensing, localization, and communications. We finish with studying MA for emerging intelligent applications before presenting a roadmap toward 6G standardization. We also point out numerous directions that are promising for future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01433v1-abstract-full').style.display = 'none'; document.getElementById('2401.01433v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted for publication in Proceedings of the IEEE</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.17493">arXiv:2312.17493</a> <span> [<a href="https://arxiv.org/pdf/2312.17493">pdf</a>, <a href="https://arxiv.org/format/2312.17493">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Differentially Private Low-Rank Adaptation of Large Language Model Using Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiao-Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+R">Rongyi Zhu</a>, <a href="/search/cs?searchtype=author&query=Zha%2C+D">Daochen Zha</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jiechao Gao</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+S">Shan Zhong</a>, <a href="/search/cs?searchtype=author&query=White%2C+M">Matt White</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Meikang Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.17493v2-abstract-short" style="display: inline;"> The surge in interest and application of large language models (LLMs) has sparked a drive to fine-tune these models to suit specific applications, such as finance and medical science. However, concerns regarding data privacy have emerged, especially when multiple stakeholders aim to collaboratively enhance LLMs using sensitive data. In this scenario, federated learning becomes a natural choice, al… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.17493v2-abstract-full').style.display = 'inline'; document.getElementById('2312.17493v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.17493v2-abstract-full" style="display: none;"> The surge in interest and application of large language models (LLMs) has sparked a drive to fine-tune these models to suit specific applications, such as finance and medical science. However, concerns regarding data privacy have emerged, especially when multiple stakeholders aim to collaboratively enhance LLMs using sensitive data. In this scenario, federated learning becomes a natural choice, allowing decentralized fine-tuning without exposing raw data to central servers. Motivated by this, we investigate how data privacy can be ensured in LLM fine-tuning through practical federated learning approaches, enabling secure contributions from multiple parties to enhance LLMs. Yet, challenges arise: 1) despite avoiding raw data exposure, there is a risk of inferring sensitive information from model outputs, and 2) federated learning for LLMs incurs notable communication overhead. To address these challenges, this article introduces DP-LoRA, a novel federated learning algorithm tailored for LLMs. DP-LoRA preserves data privacy by employing a Gaussian mechanism that adds noise in weight updates, maintaining individual data privacy while facilitating collaborative model training. Moreover, DP-LoRA optimizes communication efficiency via low-rank adaptation, minimizing the transmission of updated weights during distributed training. The experimental results across medical, financial, and general datasets using various LLMs demonstrate that DP-LoRA effectively ensures strict privacy constraints while minimizing communication overhead. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.17493v2-abstract-full').style.display = 'none'; document.getElementById('2312.17493v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 1 figure, 19 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.06761">arXiv:2311.06761</a> <span> [<a href="https://arxiv.org/pdf/2311.06761">pdf</a>, <a href="https://arxiv.org/format/2311.06761">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Learning Knowledge-Enhanced Contextual Language Representations for Domain Natural Language Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+R">Ruyao Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Taolin Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chengyu Wang</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+Z">Zhongjie Duan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Cen Chen</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Minghui Qiu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+D">Dawei Cheng</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xiaofeng He</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+W">Weining Qian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.06761v1-abstract-short" style="display: inline;"> Knowledge-Enhanced Pre-trained Language Models (KEPLMs) improve the performance of various downstream NLP tasks by injecting knowledge facts from large-scale Knowledge Graphs (KGs). However, existing methods for pre-training KEPLMs with relational triples are difficult to be adapted to close domains due to the lack of sufficient domain graph semantics. In this paper, we propose a Knowledge-enhance… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.06761v1-abstract-full').style.display = 'inline'; document.getElementById('2311.06761v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.06761v1-abstract-full" style="display: none;"> Knowledge-Enhanced Pre-trained Language Models (KEPLMs) improve the performance of various downstream NLP tasks by injecting knowledge facts from large-scale Knowledge Graphs (KGs). However, existing methods for pre-training KEPLMs with relational triples are difficult to be adapted to close domains due to the lack of sufficient domain graph semantics. In this paper, we propose a Knowledge-enhanced lANGuAge Representation learning framework for various clOsed dOmains (KANGAROO) via capturing the implicit graph structure among the entities. Specifically, since the entity coverage rates of closed-domain KGs can be relatively low and may exhibit the global sparsity phenomenon for knowledge injection, we consider not only the shallow relational representations of triples but also the hyperbolic embeddings of deep hierarchical entity-class structures for effective knowledge fusion.Moreover, as two closed-domain entities under the same entity-class often have locally dense neighbor subgraphs counted by max point biconnected component, we further propose a data augmentation strategy based on contrastive learning over subgraphs to construct hard negative samples of higher quality. It makes the underlying KELPMs better distinguish the semantics of these neighboring entities to further complement the global semantic sparsity. In the experiments, we evaluate KANGAROO over various knowledge-aware and general NLP tasks in both full and few-shot learning settings, outperforming various KEPLM training paradigms performance in closed-domains significantly. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.06761v1-abstract-full').style.display = 'none'; document.getElementById('2311.06761v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">emnlp 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.08420">arXiv:2310.08420</a> <span> [<a href="https://arxiv.org/pdf/2310.08420">pdf</a>, <a href="https://arxiv.org/format/2310.08420">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.24963/ijcai.2024/610">10.24963/ijcai.2024/610 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Visual Attention Prompted Prediction and Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yifei Zhang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+S">Siyi Gu</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+B">Bo Pan</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+G">Guangji Bai</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Meikang Qiu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaofeng Yang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+L">Liang Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.08420v3-abstract-short" style="display: inline;"> Visual explanation (attention)-guided learning uses not only labels but also explanations to guide model reasoning process. While visual attention-guided learning has shown promising results, it requires a large number of explanation annotations that are time-consuming to prepare. However, in many real-world situations, it is usually desired to prompt the model with visual attention without model… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.08420v3-abstract-full').style.display = 'inline'; document.getElementById('2310.08420v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.08420v3-abstract-full" style="display: none;"> Visual explanation (attention)-guided learning uses not only labels but also explanations to guide model reasoning process. While visual attention-guided learning has shown promising results, it requires a large number of explanation annotations that are time-consuming to prepare. However, in many real-world situations, it is usually desired to prompt the model with visual attention without model retraining. For example, when doing AI-assisted cancer classification on a medical image, users (e.g., clinicians) can provide the AI model with visual attention prompt on which areas are indispensable and which are precluded. Despite its promising objectives, achieving visual attention-prompted prediction presents several major challenges: 1) How can the visual prompt be effectively integrated into the model's reasoning process? 2) How should the model handle samples that lack visual prompts? 3) What is the impact on the model's performance when a visual prompt is imperfect? This paper introduces a novel framework for attention-prompted prediction and learning, utilizing visual prompts to steer the model's reasoning process. To improve performance in non-prompted situations and align it with prompted scenarios, we propose a co-training approach for both non-prompted and prompted models, ensuring they share similar parameters and activations. Additionally, for instances where the visual prompt does not encompass the entire input image, we have developed innovative attention prompt refinement methods. These methods interpolate the incomplete prompts while maintaining alignment with the model's explanations. Extensive experiments on four datasets demonstrate the effectiveness of our proposed framework in enhancing predictions for samples both with and without prompt. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.08420v3-abstract-full').style.display = 'none'; document.getElementById('2310.08420v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.15840">arXiv:2308.15840</a> <span> [<a href="https://arxiv.org/pdf/2308.15840">pdf</a>, <a href="https://arxiv.org/format/2308.15840">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Physics and Society">physics.soc-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s10618-024-01035-w">10.1007/s10618-024-01035-w <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> MSGNN: Multi-scale Spatio-temporal Graph Neural Network for Epidemic Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Mingjie Qiu</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+Z">Zhiyi Tan</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+B">Bing-kun Bao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.15840v1-abstract-short" style="display: inline;"> Infectious disease forecasting has been a key focus and proved to be crucial in controlling epidemic. A recent trend is to develop forecast-ing models based on graph neural networks (GNNs). However, existing GNN-based methods suffer from two key limitations: (1) Current models broaden receptive fields by scaling the depth of GNNs, which is insuffi-cient to preserve the semantics of long-range conn… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.15840v1-abstract-full').style.display = 'inline'; document.getElementById('2308.15840v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.15840v1-abstract-full" style="display: none;"> Infectious disease forecasting has been a key focus and proved to be crucial in controlling epidemic. A recent trend is to develop forecast-ing models based on graph neural networks (GNNs). However, existing GNN-based methods suffer from two key limitations: (1) Current models broaden receptive fields by scaling the depth of GNNs, which is insuffi-cient to preserve the semantics of long-range connectivity between distant but epidemic related areas. (2) Previous approaches model epidemics within single spatial scale, while ignoring the multi-scale epidemic pat-terns derived from different scales. To address these deficiencies, we devise the Multi-scale Spatio-temporal Graph Neural Network (MSGNN) based on an innovative multi-scale view. To be specific, in the proposed MSGNN model, we first devise a novel graph learning module, which directly captures long-range connectivity from trans-regional epidemic signals and integrates them into a multi-scale graph. Based on the learned multi-scale graph, we utilize a newly designed graph convolution module to exploit multi-scale epidemic patterns. This module allows us to facilitate multi-scale epidemic modeling by mining both scale-shared and scale-specific pat-terns. Experimental results on forecasting new cases of COVID-19 in United State demonstrate the superiority of our method over state-of-arts. Further analyses and visualization also show that MSGNN offers not only accurate, but also robust and interpretable forecasting result. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.15840v1-abstract-full').style.display = 'none'; document.getElementById('2308.15840v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">29 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> DAMI-D-23-00319R2 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Data Min Knowl Disc (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.13229">arXiv:2308.13229</a> <span> [<a href="https://arxiv.org/pdf/2308.13229">pdf</a>, <a href="https://arxiv.org/format/2308.13229">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ReST: A Reconfigurable Spatial-Temporal Graph Model for Multi-Camera Multi-Object Tracking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+C">Cheng-Che Cheng</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Min-Xuan Qiu</a>, <a href="/search/cs?searchtype=author&query=Chiang%2C+C">Chen-Kuo Chiang</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+S">Shang-Hong Lai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.13229v1-abstract-short" style="display: inline;"> Multi-Camera Multi-Object Tracking (MC-MOT) utilizes information from multiple views to better handle problems with occlusion and crowded scenes. Recently, the use of graph-based approaches to solve tracking problems has become very popular. However, many current graph-based methods do not effectively utilize information regarding spatial and temporal consistency. Instead, they rely on single-came… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.13229v1-abstract-full').style.display = 'inline'; document.getElementById('2308.13229v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.13229v1-abstract-full" style="display: none;"> Multi-Camera Multi-Object Tracking (MC-MOT) utilizes information from multiple views to better handle problems with occlusion and crowded scenes. Recently, the use of graph-based approaches to solve tracking problems has become very popular. However, many current graph-based methods do not effectively utilize information regarding spatial and temporal consistency. Instead, they rely on single-camera trackers as input, which are prone to fragmentation and ID switch errors. In this paper, we propose a novel reconfigurable graph model that first associates all detected objects across cameras spatially before reconfiguring it into a temporal graph for Temporal Association. This two-stage association approach enables us to extract robust spatial and temporal-aware features and address the problem with fragmented tracklets. Furthermore, our model is designed for online tracking, making it suitable for real-world applications. Experimental results show that the proposed graph model is able to extract more discriminating features for object tracking, and our model achieves state-of-the-art performance on several public datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.13229v1-abstract-full').style.display = 'none'; document.getElementById('2308.13229v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICCV2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.09012">arXiv:2308.09012</a> <span> [<a href="https://arxiv.org/pdf/2308.09012">pdf</a>, <a href="https://arxiv.org/format/2308.09012">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3627673.3679926">10.1145/3627673.3679926 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> FashionLOGO: Prompting Multimodal Large Language Models for Fashion Logo Embeddings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhen Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+D">Da Li</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Y">Yulin Su</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+M">Min Yang</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Minghui Qiu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Walton Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.09012v2-abstract-short" style="display: inline;"> Logo embedding models convert the product logos in images into vectors, enabling their utilization for logo recognition and detection within e-commerce platforms. This facilitates the enforcement of intellectual property rights and enhances product search capabilities. However, current methods treat logo embedding as a purely visual problem. A noteworthy issue is that visual models capture feature… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.09012v2-abstract-full').style.display = 'inline'; document.getElementById('2308.09012v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.09012v2-abstract-full" style="display: none;"> Logo embedding models convert the product logos in images into vectors, enabling their utilization for logo recognition and detection within e-commerce platforms. This facilitates the enforcement of intellectual property rights and enhances product search capabilities. However, current methods treat logo embedding as a purely visual problem. A noteworthy issue is that visual models capture features more than logos. Instead, we view this as a multimodal task, using text as auxiliary information to facilitate the visual model's understanding of the logo. The emerging Multimodal Large Language Models (MLLMs) have demonstrated remarkable capabilities in both visual and textual understanding. Inspired by this, we propose an approach, \textbf{FashionLOGO}, to explore how to prompt MLLMs to generate appropriate text for product images, which can help visual models achieve better logo embeddings. We adopt a cross-attention transformer block that enables visual embedding to automatically learn supplementary knowledge from textual embedding. Our extensive experiments on real-world datasets prove that FashionLOGO is capable of generating generic and robust logo embeddings, achieving state-of-the-art performance in all benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.09012v2-abstract-full').style.display = 'none'; document.getElementById('2308.09012v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.08883">arXiv:2308.08883</a> <span> [<a href="https://arxiv.org/pdf/2308.08883">pdf</a>, <a href="https://arxiv.org/format/2308.08883">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Coexistence of Heterogeneous Services in the Uplink with Discrete Signaling and Treating Interference as Noise </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Min Qiu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yu-Chih Huang</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+J">Jinhong Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.08883v1-abstract-short" style="display: inline;"> The problem of enabling the coexistence of heterogeneous services, e.g., different ultra-reliable low-latency communications (URLLC) services and/or enhanced mobile broadband (eMBB) services, in the uplink is studied. Each service has its own error probability and blocklength constraints and the longer transmission block suffers from heterogeneous interference. Due to the latency concern, the deco… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.08883v1-abstract-full').style.display = 'inline'; document.getElementById('2308.08883v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.08883v1-abstract-full" style="display: none;"> The problem of enabling the coexistence of heterogeneous services, e.g., different ultra-reliable low-latency communications (URLLC) services and/or enhanced mobile broadband (eMBB) services, in the uplink is studied. Each service has its own error probability and blocklength constraints and the longer transmission block suffers from heterogeneous interference. Due to the latency concern, the decoding of URLLC messages cannot leverage successive interference cancellation (SIC) and should always be performed before the decoding of eMBB messages. This can significantly degrade the achievable rates of URLLC users when the interference from other users is strong. To overcome this issue, we propose a new transmission scheme based on discrete signaling and treating interference as noise decoding, i.e., without SIC. Guided by the deterministic model, we provide a systematic way to construct discrete signaling for handling heterogeneous interference effectively. We demonstrate theoretically and numerically that the proposed scheme can perform close to the benchmark scheme based on capacity-achieving Gaussian signaling with the assumption of perfect SIC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.08883v1-abstract-full').style.display = 'none'; document.getElementById('2308.08883v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, accepted for presentation at IEEE Global Communications Conference (GLOBECOM) 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.04278">arXiv:2308.04278</a> <span> [<a href="https://arxiv.org/pdf/2308.04278">pdf</a>, <a href="https://arxiv.org/format/2308.04278">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Achieving Covert Communication With A Probabilistic Jamming Strategy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xun Chen</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+F">Fujun Gao</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Min Qiu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jia Zhang</a>, <a href="/search/cs?searchtype=author&query=Shu%2C+F">Feng Shu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+S">Shihao Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.04278v2-abstract-short" style="display: inline;"> In this work, we consider a covert communication scenario, where a transmitter Alice communicates to a receiver Bob with the aid of a probabilistic and uninformed jammer against an adversary warden's detection. The transmission status and power of the jammer are random and follow some priori probabilities. We first analyze the warden's detection performance as a function of the jammer's transmissi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.04278v2-abstract-full').style.display = 'inline'; document.getElementById('2308.04278v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.04278v2-abstract-full" style="display: none;"> In this work, we consider a covert communication scenario, where a transmitter Alice communicates to a receiver Bob with the aid of a probabilistic and uninformed jammer against an adversary warden's detection. The transmission status and power of the jammer are random and follow some priori probabilities. We first analyze the warden's detection performance as a function of the jammer's transmission probability, transmit power distribution, and Alice's transmit power. We then maximize the covert throughput from Alice to Bob subject to a covertness constraint, by designing the covert communication strategies from three different perspectives: Alice's perspective, the jammer's perspective, and the global perspective. Our analysis reveals that the minimum jamming power should not always be zero in the probabilistic jamming strategy, which is different from that in the continuous jamming strategy presented in the literature. In addition, we prove that the minimum jamming power should be the same as Alice's covert transmit power, depending on the covertness and average jamming power constraints. Furthermore, our results show that the probabilistic jamming can outperform the continuous jamming in terms of achieving a higher covert throughput under the same covertness and average jamming power constraints. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.04278v2-abstract-full').style.display = 'none'; document.getElementById('2308.04278v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.02457">arXiv:2308.02457</a> <span> [<a href="https://arxiv.org/pdf/2308.02457">pdf</a>, <a href="https://arxiv.org/format/2308.02457">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Survey on Temporal Knowledge Graph Completion: Taxonomy, Progress, and Prospects </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiapu Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Boyue Wang</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Meikang Qiu</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+S">Shirui Pan</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+B">Bo Xiong</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Heng Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+L">Linhao Luo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tengfei Liu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yongli Hu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+B">Baocai Yin</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+W">Wen Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.02457v1-abstract-short" style="display: inline;"> Temporal characteristics are prominently evident in a substantial volume of knowledge, which underscores the pivotal role of Temporal Knowledge Graphs (TKGs) in both academia and industry. However, TKGs often suffer from incompleteness for three main reasons: the continuous emergence of new knowledge, the weakness of the algorithm for extracting structured information from unstructured data, and t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.02457v1-abstract-full').style.display = 'inline'; document.getElementById('2308.02457v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.02457v1-abstract-full" style="display: none;"> Temporal characteristics are prominently evident in a substantial volume of knowledge, which underscores the pivotal role of Temporal Knowledge Graphs (TKGs) in both academia and industry. However, TKGs often suffer from incompleteness for three main reasons: the continuous emergence of new knowledge, the weakness of the algorithm for extracting structured information from unstructured data, and the lack of information in the source dataset. Thus, the task of Temporal Knowledge Graph Completion (TKGC) has attracted increasing attention, aiming to predict missing items based on the available information. In this paper, we provide a comprehensive review of TKGC methods and their details. Specifically, this paper mainly consists of three components, namely, 1)Background, which covers the preliminaries of TKGC methods, loss functions required for training, as well as the dataset and evaluation protocol; 2)Interpolation, that estimates and predicts the missing elements or set of elements through the relevant available information. It further categorizes related TKGC methods based on how to process temporal information; 3)Extrapolation, which typically focuses on continuous TKGs and predicts future events, and then classifies all extrapolation methods based on the algorithms they utilize. We further pinpoint the challenges and discuss future research directions of TKGC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.02457v1-abstract-full').style.display = 'none'; document.getElementById('2308.02457v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.04525">arXiv:2307.04525</a> <span> [<a href="https://arxiv.org/pdf/2307.04525">pdf</a>, <a href="https://arxiv.org/format/2307.04525">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Cluster-Induced Mask Transformers for Effective Opportunistic Gastric Cancer Screening on Non-contrast CT Scans </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yuan%2C+M">Mingze Yuan</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Y">Yingda Xia</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xin Chen</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+J">Jiawen Yao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Junli Wang</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Mingyan Qiu</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+H">Hexin Dong</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jingren Zhou</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+B">Bin Dong</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+L">Le Lu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Li Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zaiyi Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Ling Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.04525v2-abstract-short" style="display: inline;"> Gastric cancer is the third leading cause of cancer-related mortality worldwide, but no guideline-recommended screening test exists. Existing methods can be invasive, expensive, and lack sensitivity to identify early-stage gastric cancer. In this study, we explore the feasibility of using a deep learning approach on non-contrast CT scans for gastric cancer detection. We propose a novel cluster-ind… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.04525v2-abstract-full').style.display = 'inline'; document.getElementById('2307.04525v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.04525v2-abstract-full" style="display: none;"> Gastric cancer is the third leading cause of cancer-related mortality worldwide, but no guideline-recommended screening test exists. Existing methods can be invasive, expensive, and lack sensitivity to identify early-stage gastric cancer. In this study, we explore the feasibility of using a deep learning approach on non-contrast CT scans for gastric cancer detection. We propose a novel cluster-induced Mask Transformer that jointly segments the tumor and classifies abnormality in a multi-task manner. Our model incorporates learnable clusters that encode the texture and shape prototypes of gastric cancer, utilizing self- and cross-attention to interact with convolutional features. In our experiments, the proposed method achieves a sensitivity of 85.0% and specificity of 92.6% for detecting gastric tumors on a hold-out test set consisting of 100 patients with cancer and 148 normal. In comparison, two radiologists have an average sensitivity of 73.5% and specificity of 84.3%. We also obtain a specificity of 97.7% on an external test set with 903 normal cases. Our approach performs comparably to established state-of-the-art gastric cancer screening tools like blood testing and endoscopy, while also being more sensitive in detecting early-stage cancer. This demonstrates the potential of our approach as a novel, non-invasive, low-cost, and accurate method for opportunistic gastric cancer screening. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.04525v2-abstract-full').style.display = 'none'; document.getElementById('2307.04525v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MICCAI 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.17451">arXiv:2306.17451</a> <span> [<a href="https://arxiv.org/pdf/2306.17451">pdf</a>, <a href="https://arxiv.org/ps/2306.17451">ps</a>, <a href="https://arxiv.org/format/2306.17451">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Self-Connected Spatially Coupled LDPC Codes with Improved Termination </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liao%2C+Y">Yihuan Liao</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Min Qiu</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+J">Jinhong Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.17451v1-abstract-short" style="display: inline;"> This paper investigates the design of self-connected spatially coupled low-density parity-check (SC-LDPC) codes. First, a termination method is proposed to reduce rate loss. Particularly, a single-side open SC-LDPC ensemble is introduced, which halves the rate loss of a conventional terminated SC-LDPC by reducing the number of check nodes. We further propose a self-connection method that allows re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.17451v1-abstract-full').style.display = 'inline'; document.getElementById('2306.17451v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.17451v1-abstract-full" style="display: none;"> This paper investigates the design of self-connected spatially coupled low-density parity-check (SC-LDPC) codes. First, a termination method is proposed to reduce rate loss. Particularly, a single-side open SC-LDPC ensemble is introduced, which halves the rate loss of a conventional terminated SC-LDPC by reducing the number of check nodes. We further propose a self-connection method that allows reliable information to propagate from several directions to improve the decoding threshold. We demonstrate that the proposed ensembles not only achieve a better trade-off between rate loss and gap to capacity than several existing protograph SC-LDPC codes with short chain lengths but also exhibit threshold saturation behavior. Finite blocklength error performance is provided to exemplify the superiority of the proposed codes over conventional protograph SC-LDPC codes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.17451v1-abstract-full').style.display = 'none'; document.getElementById('2306.17451v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 8 figures, accepted for publication in IEEE Communications Letters</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.07207">arXiv:2306.07207</a> <span> [<a href="https://arxiv.org/pdf/2306.07207">pdf</a>, <a href="https://arxiv.org/format/2306.07207">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Valley: Video Assistant with Large Language model Enhanced abilitY </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+R">Ruipu Luo</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Ziwang Zhao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+M">Min Yang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+J">Junwei Dong</a>, <a href="/search/cs?searchtype=author&query=Li%2C+D">Da Li</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+P">Pengcheng Lu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tao Wang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+L">Linmei Hu</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Minghui Qiu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Z">Zhongyu Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.07207v2-abstract-short" style="display: inline;"> Large language models (LLMs), with their remarkable conversational capabilities, have demonstrated impressive performance across various applications and have emerged as formidable AI assistants. In view of this, it raises an intuitive question: Can we harness the power of LLMs to build multimodal AI assistants for visual applications? Recently, several multi-modal models have been developed for t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.07207v2-abstract-full').style.display = 'inline'; document.getElementById('2306.07207v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.07207v2-abstract-full" style="display: none;"> Large language models (LLMs), with their remarkable conversational capabilities, have demonstrated impressive performance across various applications and have emerged as formidable AI assistants. In view of this, it raises an intuitive question: Can we harness the power of LLMs to build multimodal AI assistants for visual applications? Recently, several multi-modal models have been developed for this purpose. They typically pre-train an adaptation module to align the semantics of the vision encoder and language model, followed by fine-tuning on instruction-following data. However, despite the success of this pipeline in image and language understanding, its effectiveness in joint video and language understanding has not been widely explored. In this paper, we aim to develop a novel multi-modal foundation model capable of comprehending video, image, and language within a general framework. To achieve this goal, we introduce Valley, a Video Assistant with Large Language model Enhanced abilitY. The Valley consists of a LLM, a temporal modeling module, a visual encoder, and a simple projection module designed to bridge visual and textual modes. To empower Valley with video comprehension and instruction-following capabilities, we construct a video instruction dataset and adopt a two-stage tuning procedure to train it. Specifically, we employ ChatGPT to facilitate the construction of task-oriented conversation data encompassing various tasks, including multi-shot captions, long video descriptions, action recognition, causal relationship inference, etc. Subsequently, we adopt a pre-training-then-instructions-tuned pipeline to align visual and textual modalities and improve the instruction-following capability of Valley. Qualitative experiments demonstrate that Valley has the potential to function as a highly effective video assistant that can make complex video understanding scenarios easy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.07207v2-abstract-full').style.display = 'none'; document.getElementById('2306.07207v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.02200">arXiv:2305.02200</a> <span> [<a href="https://arxiv.org/pdf/2305.02200">pdf</a>, <a href="https://arxiv.org/format/2305.02200">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Deep Graph Representation Learning and Optimization for Influence Maximization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ling%2C+C">Chen Ling</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Junji Jiang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Junxiang Wang</a>, <a href="/search/cs?searchtype=author&query=Thai%2C+M">My Thai</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+L">Lukas Xue</a>, <a href="/search/cs?searchtype=author&query=Song%2C+J">James Song</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Meikang Qiu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+L">Liang Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.02200v2-abstract-short" style="display: inline;"> Influence maximization (IM) is formulated as selecting a set of initial users from a social network to maximize the expected number of influenced users. Researchers have made great progress in designing various traditional methods, and their theoretical design and performance gain are close to a limit. In the past few years, learning-based IM methods have emerged to achieve stronger generalization… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.02200v2-abstract-full').style.display = 'inline'; document.getElementById('2305.02200v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.02200v2-abstract-full" style="display: none;"> Influence maximization (IM) is formulated as selecting a set of initial users from a social network to maximize the expected number of influenced users. Researchers have made great progress in designing various traditional methods, and their theoretical design and performance gain are close to a limit. In the past few years, learning-based IM methods have emerged to achieve stronger generalization ability to unknown graphs than traditional ones. However, the development of learning-based IM methods is still limited by fundamental obstacles, including 1) the difficulty of effectively solving the objective function; 2) the difficulty of characterizing the diversified underlying diffusion patterns; and 3) the difficulty of adapting the solution under various node-centrality-constrained IM variants. To cope with the above challenges, we design a novel framework DeepIM to generatively characterize the latent representation of seed sets, and we propose to learn the diversified information diffusion pattern in a data-driven and end-to-end manner. Finally, we design a novel objective function to infer optimal seed sets under flexible node-centrality-based budget constraints. Extensive analyses are conducted over both synthetic and real-world datasets to demonstrate the overall performance of DeepIM. The code and data are available at: https://github.com/triplej0079/DeepIM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.02200v2-abstract-full').style.display = 'none'; document.getElementById('2305.02200v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In Proceedings of the 40th International Conference on Machine Learning (ICML 2023), Honolulu, Hawaii, USA. PMLR 202, 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.00212">arXiv:2304.00212</a> <span> [<a href="https://arxiv.org/pdf/2304.00212">pdf</a>, <a href="https://arxiv.org/format/2304.00212">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Devil is in the Queries: Advancing Mask Transformers for Real-world Medical Image Segmentation and Out-of-Distribution Localization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yuan%2C+M">Mingze Yuan</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Y">Yingda Xia</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+H">Hexin Dong</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zifan Chen</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+J">Jiawen Yao</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Mingyan Qiu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+K">Ke Yan</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+X">Xiaoli Yin</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yu Shi</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xin Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zaiyi Liu</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+B">Bin Dong</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jingren Zhou</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+L">Le Lu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Ling Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Li Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.00212v1-abstract-short" style="display: inline;"> Real-world medical image segmentation has tremendous long-tailed complexity of objects, among which tail conditions correlate with relatively rare diseases and are clinically significant. A trustworthy medical AI algorithm should demonstrate its effectiveness on tail conditions to avoid clinically dangerous damage in these out-of-distribution (OOD) cases. In this paper, we adopt the concept of obj… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.00212v1-abstract-full').style.display = 'inline'; document.getElementById('2304.00212v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.00212v1-abstract-full" style="display: none;"> Real-world medical image segmentation has tremendous long-tailed complexity of objects, among which tail conditions correlate with relatively rare diseases and are clinically significant. A trustworthy medical AI algorithm should demonstrate its effectiveness on tail conditions to avoid clinically dangerous damage in these out-of-distribution (OOD) cases. In this paper, we adopt the concept of object queries in Mask Transformers to formulate semantic segmentation as a soft cluster assignment. The queries fit the feature-level cluster centers of inliers during training. Therefore, when performing inference on a medical image in real-world scenarios, the similarity between pixels and the queries detects and localizes OOD regions. We term this OOD localization as MaxQuery. Furthermore, the foregrounds of real-world medical images, whether OOD objects or inliers, are lesions. The difference between them is less than that between the foreground and background, possibly misleading the object queries to focus redundantly on the background. Thus, we propose a query-distribution (QD) loss to enforce clear boundaries between segmentation targets and other regions at the query level, improving the inlier segmentation and OOD indication. Our proposed framework is tested on two real-world segmentation tasks, i.e., segmentation of pancreatic and liver tumors, outperforming previous state-of-the-art algorithms by an average of 7.39% on AUROC, 14.69% on AUPR, and 13.79% on FPR95 for OOD localization. On the other hand, our framework improves the performance of inlier segmentation by an average of 5.27% DSC when compared with the leading baseline nnUNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.00212v1-abstract-full').style.display = 'none'; document.getElementById('2304.00212v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR 2023 Highlight</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.08018">arXiv:2302.08018</a> <span> [<a href="https://arxiv.org/pdf/2302.08018">pdf</a>, <a href="https://arxiv.org/format/2302.08018">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards Fair Machine Learning Software: Understanding and Addressing Model Bias Through Counterfactual Thinking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zichong Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yang Zhou</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Meikang Qiu</a>, <a href="/search/cs?searchtype=author&query=Haque%2C+I">Israat Haque</a>, <a href="/search/cs?searchtype=author&query=Brown%2C+L">Laura Brown</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yi He</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jianwu Wang</a>, <a href="/search/cs?searchtype=author&query=Lo%2C+D">David Lo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenbin Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.08018v1-abstract-short" style="display: inline;"> The increasing use of Machine Learning (ML) software can lead to unfair and unethical decisions, thus fairness bugs in software are becoming a growing concern. Addressing these fairness bugs often involves sacrificing ML performance, such as accuracy. To address this issue, we present a novel counterfactual approach that uses counterfactual thinking to tackle the root causes of bias in ML software… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.08018v1-abstract-full').style.display = 'inline'; document.getElementById('2302.08018v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.08018v1-abstract-full" style="display: none;"> The increasing use of Machine Learning (ML) software can lead to unfair and unethical decisions, thus fairness bugs in software are becoming a growing concern. Addressing these fairness bugs often involves sacrificing ML performance, such as accuracy. To address this issue, we present a novel counterfactual approach that uses counterfactual thinking to tackle the root causes of bias in ML software. In addition, our approach combines models optimized for both performance and fairness, resulting in an optimal solution in both aspects. We conducted a thorough evaluation of our approach on 10 benchmark tasks using a combination of 5 performance metrics, 3 fairness metrics, and 15 measurement scenarios, all applied to 8 real-world datasets. The conducted extensive evaluations show that the proposed method significantly improves the fairness of ML software while maintaining competitive performance, outperforming state-of-the-art solutions in 84.6% of overall cases based on a recent benchmarking tool. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.08018v1-abstract-full').style.display = 'none'; document.getElementById('2302.08018v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.03507">arXiv:2302.03507</a> <span> [<a href="https://arxiv.org/pdf/2302.03507">pdf</a>, <a href="https://arxiv.org/format/2302.03507">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Meta-Learning Siamese Network for Few-Shot Text Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Han%2C+C">Chengcheng Han</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuhe Wang</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+Y">Yingnan Fu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiang Li</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Minghui Qiu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+M">Ming Gao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+A">Aoying Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.03507v2-abstract-short" style="display: inline;"> Few-shot learning has been used to tackle the problem of label scarcity in text classification, of which meta-learning based methods have shown to be effective, such as the prototypical networks (PROTO). Despite the success of PROTO, there still exist three main problems: (1) ignore the randomness of the sampled support sets when computing prototype vectors; (2) disregard the importance of labeled… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.03507v2-abstract-full').style.display = 'inline'; document.getElementById('2302.03507v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.03507v2-abstract-full" style="display: none;"> Few-shot learning has been used to tackle the problem of label scarcity in text classification, of which meta-learning based methods have shown to be effective, such as the prototypical networks (PROTO). Despite the success of PROTO, there still exist three main problems: (1) ignore the randomness of the sampled support sets when computing prototype vectors; (2) disregard the importance of labeled samples; (3) construct meta-tasks in a purely random manner. In this paper, we propose a Meta-Learning Siamese Network, namely, Meta-SN, to address these issues. Specifically, instead of computing prototype vectors from the sampled support sets, Meta-SN utilizes external knowledge (e.g. class names and descriptive texts) for class labels, which is encoded as the low-dimensional embeddings of prototype vectors. In addition, Meta-SN presents a novel sampling strategy for constructing meta-tasks, which gives higher sampling probabilities to hard-to-classify samples. Extensive experiments are conducted on six benchmark datasets to show the clear superiority of Meta-SN over other state-of-the-art models. For reproducibility, all the datasets and codes are provided at https://github.com/hccngu/Meta-SN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.03507v2-abstract-full').style.display = 'none'; document.getElementById('2302.03507v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.12291">arXiv:2301.12291</a> <span> [<a href="https://arxiv.org/pdf/2301.12291">pdf</a>, <a href="https://arxiv.org/format/2301.12291">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CancerUniT: Towards a Single Unified Model for Effective Detection, Segmentation, and Diagnosis of Eight Major Cancers Using a Large Collection of CT Scans </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jieneng Chen</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Y">Yingda Xia</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+J">Jiawen Yao</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+K">Ke Yan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianpeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+L">Le Lu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+F">Fakai Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+B">Bo Zhou</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Mingyan Qiu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Q">Qihang Yu</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+M">Mingze Yuan</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+W">Wei Fang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yuxing Tang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+M">Minfeng Xu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jian Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yuqian Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qifeng Wang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xianghua Ye</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+X">Xiaoli Yin</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yu Shi</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xin Chen</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jingren Zhou</a>, <a href="/search/cs?searchtype=author&query=Yuille%2C+A">Alan Yuille</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zaiyi Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Ling Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.12291v2-abstract-short" style="display: inline;"> Human readers or radiologists routinely perform full-body multi-organ multi-disease detection and diagnosis in clinical practice, while most medical AI systems are built to focus on single organs with a narrow list of a few diseases. This might severely limit AI's clinical adoption. A certain number of AI models need to be assembled non-trivially to match the diagnostic process of a human reading… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.12291v2-abstract-full').style.display = 'inline'; document.getElementById('2301.12291v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.12291v2-abstract-full" style="display: none;"> Human readers or radiologists routinely perform full-body multi-organ multi-disease detection and diagnosis in clinical practice, while most medical AI systems are built to focus on single organs with a narrow list of a few diseases. This might severely limit AI's clinical adoption. A certain number of AI models need to be assembled non-trivially to match the diagnostic process of a human reading a CT scan. In this paper, we construct a Unified Tumor Transformer (CancerUniT) model to jointly detect tumor existence & location and diagnose tumor characteristics for eight major cancers in CT scans. CancerUniT is a query-based Mask Transformer model with the output of multi-tumor prediction. We decouple the object queries into organ queries, tumor detection queries and tumor diagnosis queries, and further establish hierarchical relationships among the three groups. This clinically-inspired architecture effectively assists inter- and intra-organ representation learning of tumors and facilitates the resolution of these complex, anatomically related multi-organ cancer image reading tasks. CancerUniT is trained end-to-end using a curated large-scale CT images of 10,042 patients including eight major types of cancers and occurring non-cancer tumors (all are pathology-confirmed with 3D tumor masks annotated by radiologists). On the test set of 631 patients, CancerUniT has demonstrated strong performance under a set of clinically relevant evaluation metrics, substantially outperforming both multi-disease methods and an assembly of eight single-organ expert models in tumor detection, segmentation, and diagnosis. This moves one step closer towards a universal high performance cancer screening tool. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.12291v2-abstract-full').style.display = 'none'; document.getElementById('2301.12291v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICCV 2023 Camera Ready Version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.09303">arXiv:2301.09303</a> <span> [<a href="https://arxiv.org/pdf/2301.09303">pdf</a>, <a href="https://arxiv.org/format/2301.09303">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Downlink Transmission under Heterogeneous Blocklength Constraints: Discrete Signaling with Single-User Decoding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Min Qiu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yu-Chih Huang</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+J">Jinhong Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.09303v1-abstract-short" style="display: inline;"> In this paper, we consider the downlink broadcast channel under heterogenous blocklength constraints, where each user experiences different interference statistics across its received symbols. Different from the homogeneous blocklength case, the strong users with short blocklength transmitted symbol blocks usually cannot wait to receive the entire transmission frame and perform successive interfer… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.09303v1-abstract-full').style.display = 'inline'; document.getElementById('2301.09303v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.09303v1-abstract-full" style="display: none;"> In this paper, we consider the downlink broadcast channel under heterogenous blocklength constraints, where each user experiences different interference statistics across its received symbols. Different from the homogeneous blocklength case, the strong users with short blocklength transmitted symbol blocks usually cannot wait to receive the entire transmission frame and perform successive interference cancellation (SIC) owing to their stringent latency requirements. Even if SIC is feasible, it may not be perfect under finite blocklength constraints. To cope with the heterogeneity in latency and reliability requirements, we propose a practical downlink transmission scheme with discrete signaling and single-user decoding, i.e., without SIC. In addition, we derive the finite blocklength achievable rate and use it for guiding the design of channel coding and modulations. Both achievable rate and error probability simulation show that the proposed scheme can operate close to the benchmark scheme which assumes capacity-achieving signaling and perfect SIC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.09303v1-abstract-full').style.display = 'none'; document.getElementById('2301.09303v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 1 figure, accepted for presentation at IEEE ICC 2023. arXiv admin note: substantial text overlap with arXiv:2212.01736</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.10013">arXiv:2212.10013</a> <span> [<a href="https://arxiv.org/pdf/2212.10013">pdf</a>, <a href="https://arxiv.org/format/2212.10013">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> DocAsRef: An Empirical Study on Repurposing Reference-Based Summary Quality Metrics Reference-Freely </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bao%2C+F+S">Forrest Sheng Bao</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+R">Ruixuan Tu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+G">Ge Luo</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yinfei Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hebi Li</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Minghui Qiu</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Youbiao He</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Cen Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.10013v2-abstract-short" style="display: inline;"> Automated summary quality assessment falls into two categories: reference-based and reference-free. Reference-based metrics, historically deemed more accurate due to the additional information provided by human-written references, are limited by their reliance on human input. In this paper, we hypothesize that the comparison methodologies used by some reference-based metrics to evaluate a system s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.10013v2-abstract-full').style.display = 'inline'; document.getElementById('2212.10013v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.10013v2-abstract-full" style="display: none;"> Automated summary quality assessment falls into two categories: reference-based and reference-free. Reference-based metrics, historically deemed more accurate due to the additional information provided by human-written references, are limited by their reliance on human input. In this paper, we hypothesize that the comparison methodologies used by some reference-based metrics to evaluate a system summary against its corresponding reference can be effectively adapted to assess it against its source document, thereby transforming these metrics into reference-free ones. Experimental results support this hypothesis. After being repurposed reference-freely, the zero-shot BERTScore using the pretrained DeBERTa-large-MNLI model of <0.5B parameters consistently outperforms its original reference-based version across various aspects on the SummEval and Newsroom datasets. It also excels in comparison to most existing reference-free metrics and closely competes with zero-shot summary evaluators based on GPT-3.5. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.10013v2-abstract-full').style.display = 'none'; document.getElementById('2212.10013v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted into Findings of EMNLP 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.01736">arXiv:2212.01736</a> <span> [<a href="https://arxiv.org/pdf/2212.01736">pdf</a>, <a href="https://arxiv.org/format/2212.01736">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Downlink Transmission with Heterogeneous URLLC Services: Discrete Signaling With Single-User Decoding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Min Qiu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yu-Chih Huang</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+J">Jinhong Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.01736v2-abstract-short" style="display: inline;"> The problem of designing downlink transmission schemes for supporting heterogeneous ultra-reliable low-latency communications (URLLC) and/or with other types of services is investigated. We consider the broadcast channel, where the base station sends superimposed signals to multiple users. Under heterogeneous blocklength constraints, strong users who are URLLC users cannot wait to receive the enti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.01736v2-abstract-full').style.display = 'inline'; document.getElementById('2212.01736v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.01736v2-abstract-full" style="display: none;"> The problem of designing downlink transmission schemes for supporting heterogeneous ultra-reliable low-latency communications (URLLC) and/or with other types of services is investigated. We consider the broadcast channel, where the base station sends superimposed signals to multiple users. Under heterogeneous blocklength constraints, strong users who are URLLC users cannot wait to receive the entire transmission frame and perform successive interference cancellation (SIC) due to stringent latency requirements, in contrast to the conventional infinite blocklength cases. Even if SIC is feasible, SIC may be imperfect under finite blocklength constraints. To cope with the heterogeneity in latency and reliability requirements, we propose a practical downlink transmission scheme with discrete signaling and single-user decoding (SUD), i.e., without SIC. We carefully design the discrete input distributions to enable efficient SUD by exploiting the structural interference. Furthermore, we derive the second-order achievable rate under heterogenous blocklength and error probability constraints and use it to guide the design of channel coding and modulations. It is shown that in terms of achievable rate under short blocklength, the proposed scheme with regular quadrature amplitude modulations and SUD can operate extremely close to the benchmark schemes that assume perfect SIC with Gaussian signaling. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.01736v2-abstract-full').style.display = 'none'; document.getElementById('2212.01736v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 7 figures, accepted by IEEE Journal on Selected Areas in Communications</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.05100">arXiv:2211.05100</a> <span> [<a href="https://arxiv.org/pdf/2211.05100">pdf</a>, <a href="https://arxiv.org/format/2211.05100">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> BLOOM: A 176B-Parameter Open-Access Multilingual Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Workshop%2C+B">BigScience Workshop</a>, <a href="/search/cs?searchtype=author&query=%3A"> :</a>, <a href="/search/cs?searchtype=author&query=Scao%2C+T+L">Teven Le Scao</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+A">Angela Fan</a>, <a href="/search/cs?searchtype=author&query=Akiki%2C+C">Christopher Akiki</a>, <a href="/search/cs?searchtype=author&query=Pavlick%2C+E">Ellie Pavlick</a>, <a href="/search/cs?searchtype=author&query=Ili%C4%87%2C+S">Suzana Ili膰</a>, <a href="/search/cs?searchtype=author&query=Hesslow%2C+D">Daniel Hesslow</a>, <a href="/search/cs?searchtype=author&query=Castagn%C3%A9%2C+R">Roman Castagn茅</a>, <a href="/search/cs?searchtype=author&query=Luccioni%2C+A+S">Alexandra Sasha Luccioni</a>, <a href="/search/cs?searchtype=author&query=Yvon%2C+F">Fran莽ois Yvon</a>, <a href="/search/cs?searchtype=author&query=Gall%C3%A9%2C+M">Matthias Gall茅</a>, <a href="/search/cs?searchtype=author&query=Tow%2C+J">Jonathan Tow</a>, <a href="/search/cs?searchtype=author&query=Rush%2C+A+M">Alexander M. Rush</a>, <a href="/search/cs?searchtype=author&query=Biderman%2C+S">Stella Biderman</a>, <a href="/search/cs?searchtype=author&query=Webson%2C+A">Albert Webson</a>, <a href="/search/cs?searchtype=author&query=Ammanamanchi%2C+P+S">Pawan Sasanka Ammanamanchi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Thomas Wang</a>, <a href="/search/cs?searchtype=author&query=Sagot%2C+B">Beno卯t Sagot</a>, <a href="/search/cs?searchtype=author&query=Muennighoff%2C+N">Niklas Muennighoff</a>, <a href="/search/cs?searchtype=author&query=del+Moral%2C+A+V">Albert Villanova del Moral</a>, <a href="/search/cs?searchtype=author&query=Ruwase%2C+O">Olatunji Ruwase</a>, <a href="/search/cs?searchtype=author&query=Bawden%2C+R">Rachel Bawden</a>, <a href="/search/cs?searchtype=author&query=Bekman%2C+S">Stas Bekman</a>, <a href="/search/cs?searchtype=author&query=McMillan-Major%2C+A">Angelina McMillan-Major</a> , et al. (369 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.05100v4-abstract-short" style="display: inline;"> Large language models (LLMs) have been shown to be able to perform new tasks based on a few demonstrations or natural language instructions. While these capabilities have led to widespread adoption, most LLMs are developed by resource-rich organizations and are frequently kept from the public. As a step towards democratizing this powerful technology, we present BLOOM, a 176B-parameter open-access… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.05100v4-abstract-full').style.display = 'inline'; document.getElementById('2211.05100v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.05100v4-abstract-full" style="display: none;"> Large language models (LLMs) have been shown to be able to perform new tasks based on a few demonstrations or natural language instructions. While these capabilities have led to widespread adoption, most LLMs are developed by resource-rich organizations and are frequently kept from the public. As a step towards democratizing this powerful technology, we present BLOOM, a 176B-parameter open-access language model designed and built thanks to a collaboration of hundreds of researchers. BLOOM is a decoder-only Transformer language model that was trained on the ROOTS corpus, a dataset comprising hundreds of sources in 46 natural and 13 programming languages (59 in total). We find that BLOOM achieves competitive performance on a wide variety of benchmarks, with stronger results after undergoing multitask prompted finetuning. To facilitate future research and applications using LLMs, we publicly release our models and code under the Responsible AI License. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.05100v4-abstract-full').style.display = 'none'; document.getElementById('2211.05100v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.11674">arXiv:2210.11674</a> <span> [<a href="https://arxiv.org/pdf/2210.11674">pdf</a>, <a href="https://arxiv.org/format/2210.11674">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> WristSketcher: Creating Dynamic Sketches in AR with a Sensing Wristband </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ying%2C+E">Enting Ying</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+T">Tianyang Xiong</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+S">Shihui Guo</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Ming Qiu</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+Y">Yipeng Qin</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+H">Hongbo Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.11674v2-abstract-short" style="display: inline;"> Restricted by the limited interaction area of native AR glasses (e.g., touch bars), it is challenging to create sketches in AR glasses. Recent works have attempted to use mobile devices (e.g., tablets) or mid-air bare-hand gestures to expand the interactive spaces and can work as the 2D/3D sketching input interfaces for AR glasses. Between them, mobile devices allow for accurate sketching but are… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.11674v2-abstract-full').style.display = 'inline'; document.getElementById('2210.11674v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.11674v2-abstract-full" style="display: none;"> Restricted by the limited interaction area of native AR glasses (e.g., touch bars), it is challenging to create sketches in AR glasses. Recent works have attempted to use mobile devices (e.g., tablets) or mid-air bare-hand gestures to expand the interactive spaces and can work as the 2D/3D sketching input interfaces for AR glasses. Between them, mobile devices allow for accurate sketching but are often heavy to carry, while sketching with bare hands is zero-burden but can be inaccurate due to arm instability. In addition, mid-air bare-hand sketching can easily lead to social misunderstandings and its prolonged use can cause arm fatigue. As a new attempt, in this work, we present WristSketcher, a new AR system based on a flexible sensing wristband for creating 2D dynamic sketches, featuring an almost zero-burden authoring model for accurate and comfortable sketch creation in real-world scenarios. Specifically, we have streamlined the interaction space from the mid-air to the surface of a lightweight sensing wristband, and implemented AR sketching and associated interaction commands by developing a gesture recognition method based on the sensing pressure points on the wristband. The set of interactive gestures used by our WristSketcher is determined by a heuristic study on user preferences. Moreover, we endow our WristSketcher with the ability of animation creation, allowing it to create dynamic and expressive sketches. Experimental results demonstrate that our WristSketcher i) faithfully recognizes users' gesture interactions with a high accuracy of 96.0%; ii) achieves higher sketching accuracy than Freehand sketching; iii) achieves high user satisfaction in ease of use, usability and functionality; and iv) shows innovation potentials in art creation, memory aids, and entertainment applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.11674v2-abstract-full').style.display = 'none'; document.getElementById('2210.11674v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Qiu%2C+M&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Qiu%2C+M&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Qiu%2C+M&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Qiu%2C+M&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository