Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 319 results for author: <span class="mathjax">Wei, L</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Wei%2C+L">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Wei, L"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Wei%2C+L&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Wei, L"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Wei%2C+L&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Wei%2C+L&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+L&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+L&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+L&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+L&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+L&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+L&start=300" class="pagination-link " aria-label="Page 7" aria-current="page">7 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05879">arXiv:2411.05879</a> <span>  </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Smile upon the Face but Sadness in the Eyes: Emotion Recognition based on Facial Expressions and Eye Behaviors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuanyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lin Wei</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kejun Liu</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+Y">Yibing Zhan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zijing Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhe Chen</a>, <a href="/search/cs?searchtype=author&query=Shan%2C+S">Shiguang Shan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05879v2-abstract-short" style="display: inline;"> Emotion Recognition (ER) is the process of identifying human emotions from given data. Currently, the field heavily relies on facial expression recognition (FER) because facial expressions contain rich emotional cues. However, it is important to note that facial expressions may not always precisely reflect genuine emotions and FER-based results may yield misleading ER. To understand and bridge thi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05879v2-abstract-full').style.display = 'inline'; document.getElementById('2411.05879v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05879v2-abstract-full" style="display: none;"> Emotion Recognition (ER) is the process of identifying human emotions from given data. Currently, the field heavily relies on facial expression recognition (FER) because facial expressions contain rich emotional cues. However, it is important to note that facial expressions may not always precisely reflect genuine emotions and FER-based results may yield misleading ER. To understand and bridge this gap between FER and ER, we introduce eye behaviors as an important emotional cues for the creation of a new Eye-behavior-aided Multimodal Emotion Recognition (EMER) dataset. Different from existing multimodal ER datasets, the EMER dataset employs a stimulus material-induced spontaneous emotion generation method to integrate non-invasive eye behavior data, like eye movements and eye fixation maps, with facial videos, aiming to obtain natural and accurate human emotions. Notably, for the first time, we provide annotations for both ER and FER in the EMER, enabling a comprehensive analysis to better illustrate the gap between both tasks. Furthermore, we specifically design a new EMERT architecture to concurrently enhance performance in both ER and FER by efficiently identifying and bridging the emotion gap between the two.Specifically, our EMERT employs modality-adversarial feature decoupling and multi-task Transformer to augment the modeling of eye behaviors, thus providing an effective complement to facial expressions. In the experiment, we introduce seven multimodal benchmark protocols for a variety of comprehensive evaluations of the EMER dataset. The results show that the EMERT outperforms other state-of-the-art multimodal methods by a great margin, revealing the importance of modeling eye behaviors for robust ER. To sum up, we provide a comprehensive analysis of the importance of eye behaviors in ER, advancing the study on addressing the gap between FER and ER for more robust ER performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05879v2-abstract-full').style.display = 'none'; document.getElementById('2411.05879v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The paper is part of ongoing work and we request to withdraw it from arXiv to revise it further. And The paper was submitted without agreement from all co-authors</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05361">arXiv:2411.05361</a> <span> [<a href="https://arxiv.org/pdf/2411.05361">pdf</a>, <a href="https://arxiv.org/format/2411.05361">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Dynamic-SUPERB Phase-2: A Collaboratively Expanding Benchmark for Measuring the Capabilities of Spoken Language Models with 180 Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chien-yu Huang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wei-Chih Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Shu-wen Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+A+T">Andy T. Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chen-An Li</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Yu-Xiang Lin</a>, <a href="/search/cs?searchtype=author&query=Tseng%2C+W">Wei-Cheng Tseng</a>, <a href="/search/cs?searchtype=author&query=Diwan%2C+A">Anuj Diwan</a>, <a href="/search/cs?searchtype=author&query=Shih%2C+Y">Yi-Jen Shih</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+J">Jiatong Shi</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">William Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xuanjun Chen</a>, <a href="/search/cs?searchtype=author&query=Hsiao%2C+C">Chi-Yuan Hsiao</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+P">Puyuan Peng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shih-Heng Wang</a>, <a href="/search/cs?searchtype=author&query=Kuan%2C+C">Chun-Yi Kuan</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+K">Ke-Han Lu</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+K">Kai-Wei Chang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chih-Kai Yang</a>, <a href="/search/cs?searchtype=author&query=Ritter-Gutierrez%2C+F">Fabian Ritter-Gutierrez</a>, <a href="/search/cs?searchtype=author&query=Chuang%2C+M+T">Ming To Chuang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kuan-Po Huang</a>, <a href="/search/cs?searchtype=author&query=Arora%2C+S">Siddhant Arora</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">You-Kuan Lin</a>, <a href="/search/cs?searchtype=author&query=Yeo%2C+E">Eunjung Yeo</a> , et al. (53 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05361v1-abstract-short" style="display: inline;"> Multimodal foundation models, such as Gemini and ChatGPT, have revolutionized human-machine interactions by seamlessly integrating various forms of data. Developing a universal spoken language model that comprehends a wide range of natural language instructions is critical for bridging communication gaps and facilitating more intuitive interactions. However, the absence of a comprehensive evaluati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05361v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05361v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05361v1-abstract-full" style="display: none;"> Multimodal foundation models, such as Gemini and ChatGPT, have revolutionized human-machine interactions by seamlessly integrating various forms of data. Developing a universal spoken language model that comprehends a wide range of natural language instructions is critical for bridging communication gaps and facilitating more intuitive interactions. However, the absence of a comprehensive evaluation benchmark poses a significant challenge. We present Dynamic-SUPERB Phase-2, an open and evolving benchmark for the comprehensive evaluation of instruction-based universal speech models. Building upon the first generation, this second version incorporates 125 new tasks contributed collaboratively by the global research community, expanding the benchmark to a total of 180 tasks, making it the largest benchmark for speech and audio evaluation. While the first generation of Dynamic-SUPERB was limited to classification tasks, Dynamic-SUPERB Phase-2 broadens its evaluation capabilities by introducing a wide array of novel and diverse tasks, including regression and sequence generation, across speech, music, and environmental audio. Evaluation results indicate that none of the models performed well universally. SALMONN-13B excelled in English ASR, while WavLLM demonstrated high accuracy in emotion recognition, but current models still require further innovations to handle a broader range of tasks. We will soon open-source all task data and the evaluation pipeline. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05361v1-abstract-full').style.display = 'none'; document.getElementById('2411.05361v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01766">arXiv:2411.01766</a> <span> [<a href="https://arxiv.org/pdf/2411.01766">pdf</a>, <a href="https://arxiv.org/ps/2411.01766">ps</a>, <a href="https://arxiv.org/format/2411.01766">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Lyapunov-guided Multi-Agent Reinforcement Learning for Delay-Sensitive Wireless Scheduling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Cheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lan Wei</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+J">Ji Fan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zening Liu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yongming Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01766v2-abstract-short" style="display: inline;"> In this paper, a two-stage intelligent scheduler is proposed to minimize the packet-level delay jitter while guaranteeing delay bound. Firstly, Lyapunov technology is employed to transform the delay-violation constraint into a sequential slot-level queue stability problem. Secondly, a hierarchical scheme is proposed to solve the resource allocation between multiple base stations and users, where t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01766v2-abstract-full').style.display = 'inline'; document.getElementById('2411.01766v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01766v2-abstract-full" style="display: none;"> In this paper, a two-stage intelligent scheduler is proposed to minimize the packet-level delay jitter while guaranteeing delay bound. Firstly, Lyapunov technology is employed to transform the delay-violation constraint into a sequential slot-level queue stability problem. Secondly, a hierarchical scheme is proposed to solve the resource allocation between multiple base stations and users, where the multi-agent reinforcement learning (MARL) gives the user priority and the number of scheduled packets, while the underlying scheduler allocates the resource. Our proposed scheme achieves lower delay jitter and delay violation rate than the Round-Robin Earliest Deadline First algorithm and MARL with delay violation penalty. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01766v2-abstract-full').style.display = 'none'; document.getElementById('2411.01766v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15732">arXiv:2410.15732</a> <span> [<a href="https://arxiv.org/pdf/2410.15732">pdf</a>, <a href="https://arxiv.org/format/2410.15732">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ViMoE: An Empirical Study of Designing Vision Mixture-of-Experts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Han%2C+X">Xumeng Han</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Longhui Wei</a>, <a href="/search/cs?searchtype=author&query=Dou%2C+Z">Zhiyang Dou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zipeng Wang</a>, <a href="/search/cs?searchtype=author&query=Qiang%2C+C">Chenhui Qiang</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xin He</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yingfei Sun</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Z">Zhenjun Han</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Q">Qi Tian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15732v1-abstract-short" style="display: inline;"> Mixture-of-Experts (MoE) models embody the divide-and-conquer concept and are a promising approach for increasing model capacity, demonstrating excellent scalability across multiple domains. In this paper, we integrate the MoE structure into the classic Vision Transformer (ViT), naming it ViMoE, and explore the potential of applying MoE to vision through a comprehensive study on image classificati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15732v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15732v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15732v1-abstract-full" style="display: none;"> Mixture-of-Experts (MoE) models embody the divide-and-conquer concept and are a promising approach for increasing model capacity, demonstrating excellent scalability across multiple domains. In this paper, we integrate the MoE structure into the classic Vision Transformer (ViT), naming it ViMoE, and explore the potential of applying MoE to vision through a comprehensive study on image classification. However, we observe that the performance is sensitive to the configuration of MoE layers, making it challenging to obtain optimal results without careful design. The underlying cause is that inappropriate MoE layers lead to unreliable routing and hinder experts from effectively acquiring helpful knowledge. To address this, we introduce a shared expert to learn and capture common information, serving as an effective way to construct stable ViMoE. Furthermore, we demonstrate how to analyze expert routing behavior, revealing which MoE layers are capable of specializing in handling specific information and which are not. This provides guidance for retaining the critical layers while removing redundancies, thereby advancing ViMoE to be more efficient without sacrificing accuracy. We aspire for this work to offer new insights into the design of vision MoE models and provide valuable empirical guidance for future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15732v1-abstract-full').style.display = 'none'; document.getElementById('2410.15732v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13109">arXiv:2410.13109</a> <span> [<a href="https://arxiv.org/pdf/2410.13109">pdf</a>, <a href="https://arxiv.org/ps/2410.13109">ps</a>, <a href="https://arxiv.org/format/2410.13109">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Contextual Bandits with Arm Request Costs and Delays </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lai Wei</a>, <a href="/search/cs?searchtype=author&query=Tewari%2C+A">Ambuj Tewari</a>, <a href="/search/cs?searchtype=author&query=Cianfrocco%2C+M+A">Michael A. Cianfrocco</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13109v2-abstract-short" style="display: inline;"> We introduce a novel extension of the contextual bandit problem, where new sets of arms can be requested with stochastic time delays and associated costs. In this setting, the learner can select multiple arms from a decision set, with each selection taking one unit of time. The problem is framed as a special case of semi-Markov decision processes (SMDPs). The arm contexts, request times, and costs… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13109v2-abstract-full').style.display = 'inline'; document.getElementById('2410.13109v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13109v2-abstract-full" style="display: none;"> We introduce a novel extension of the contextual bandit problem, where new sets of arms can be requested with stochastic time delays and associated costs. In this setting, the learner can select multiple arms from a decision set, with each selection taking one unit of time. The problem is framed as a special case of semi-Markov decision processes (SMDPs). The arm contexts, request times, and costs are assumed to follow an unknown distribution. We consider the regret of an online learning algorithm with respect to the optimal policy that achieves the maximum average reward. By leveraging the Bellman optimality equation, we design algorithms that can effectively select arms and determine the appropriate time to request new arms, thereby minimizing their regret. Under the realizability assumption, we analyze the proposed algorithms and demonstrate that their regret upper bounds align with established results in the contextual bandit literature. We validate the algorithms through experiments on simulated data and a movie recommendation dataset, showing that their performance is consistent with theoretical analyses. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13109v2-abstract-full').style.display = 'none'; document.getElementById('2410.13109v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10652">arXiv:2410.10652</a> <span> [<a href="https://arxiv.org/pdf/2410.10652">pdf</a>, <a href="https://arxiv.org/format/2410.10652">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> QueST: Querying Functional and Structural Niches on Spatial Transcriptomics Data via Contrastive Subgraph Embedding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+M">Mo Chen</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+M">Minsheng Hao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xuegong Zhang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10652v1-abstract-short" style="display: inline;"> The functional or structural spatial regions within tissues, referred to as spatial niches, are elements for illustrating the spatial contexts of multicellular organisms. A key challenge is querying shared niches across diverse tissues, which is crucial for achieving a comprehensive understanding of the organization and phenotypes of cell populations. However, current data analysis methods predomi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10652v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10652v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10652v1-abstract-full" style="display: none;"> The functional or structural spatial regions within tissues, referred to as spatial niches, are elements for illustrating the spatial contexts of multicellular organisms. A key challenge is querying shared niches across diverse tissues, which is crucial for achieving a comprehensive understanding of the organization and phenotypes of cell populations. However, current data analysis methods predominantly focus on creating spatial-aware embeddings for cells, neglecting the development of niche-level representations for effective querying. To address this gap, we introduce QueST, a novel niche representation learning model designed for querying spatial niches across multiple samples. QueST utilizes a novel subgraph contrastive learning approach to explicitly capture niche-level characteristics and incorporates adversarial training to mitigate batch effects. We evaluate QueST on established benchmarks using human and mouse datasets, demonstrating its superiority over state-of-the-art graph representation learning methods in accurate niche queries. Overall, QueST offers a specialized model for spatial niche queries, paving the way for deeper insights into the patterns and mechanisms of cell spatial organization across tissues. Source code can be found at https://github.com/cmhimself/QueST. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10652v1-abstract-full').style.display = 'none'; document.getElementById('2410.10652v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07138">arXiv:2410.07138</a> <span> [<a href="https://arxiv.org/pdf/2410.07138">pdf</a>, <a href="https://arxiv.org/format/2410.07138">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Diagnosis and Pathogenic Analysis of Autism Spectrum Disorder Using Fused Brain Connection Graph </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lu Wei</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yi Huang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+G">Guosheng Yin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+F">Fode Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Manxue Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+B">Bin Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07138v1-abstract-short" style="display: inline;"> We propose a model for diagnosing Autism spectrum disorder (ASD) using multimodal magnetic resonance imaging (MRI) data. Our approach integrates brain connectivity data from diffusion tensor imaging (DTI) and functional MRI (fMRI), employing graph neural networks (GNNs) for fused graph classification. To improve diagnostic accuracy, we introduce a loss function that maximizes inter-class and minim… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07138v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07138v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07138v1-abstract-full" style="display: none;"> We propose a model for diagnosing Autism spectrum disorder (ASD) using multimodal magnetic resonance imaging (MRI) data. Our approach integrates brain connectivity data from diffusion tensor imaging (DTI) and functional MRI (fMRI), employing graph neural networks (GNNs) for fused graph classification. To improve diagnostic accuracy, we introduce a loss function that maximizes inter-class and minimizes intra-class margins. We also analyze network node centrality, calculating degree, subgraph, and eigenvector centralities on a bimodal fused brain graph to identify pathological regions linked to ASD. Two non-parametric tests assess the statistical significance of these centralities between ASD patients and healthy controls. Our results reveal consistency between the tests, yet the identified regions differ significantly across centralities, suggesting distinct physiological interpretations. These findings enhance our understanding of ASD's neurobiological basis and offer new directions for clinical diagnosis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07138v1-abstract-full').style.display = 'none'; document.getElementById('2410.07138v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04521">arXiv:2410.04521</a> <span> [<a href="https://arxiv.org/pdf/2410.04521">pdf</a>, <a href="https://arxiv.org/format/2410.04521">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MC-CoT: A Modular Collaborative CoT Framework for Zero-shot Medical-VQA with LLM and MLLM Integration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lai Wei</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wenkai Wang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+X">Xiaoyu Shen</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yu Xie</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+Z">Zhihao Fan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaojin Zhang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Z">Zhongyu Wei</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wei Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04521v1-abstract-short" style="display: inline;"> In recent advancements, multimodal large language models (MLLMs) have been fine-tuned on specific medical image datasets to address medical visual question answering (Med-VQA) tasks. However, this common approach of task-specific fine-tuning is costly and necessitates separate models for each downstream task, limiting the exploration of zero-shot capabilities. In this paper, we introduce MC-CoT, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04521v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04521v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04521v1-abstract-full" style="display: none;"> In recent advancements, multimodal large language models (MLLMs) have been fine-tuned on specific medical image datasets to address medical visual question answering (Med-VQA) tasks. However, this common approach of task-specific fine-tuning is costly and necessitates separate models for each downstream task, limiting the exploration of zero-shot capabilities. In this paper, we introduce MC-CoT, a modular cross-modal collaboration Chain-of-Thought (CoT) framework designed to enhance the zero-shot performance of MLLMs in Med-VQA by leveraging large language models (LLMs). MC-CoT improves reasoning and information extraction by integrating medical knowledge and task-specific guidance, where LLM provides various complex medical reasoning chains and MLLM provides various observations of medical images based on instructions of the LLM. Our experiments on datasets such as SLAKE, VQA-RAD, and PATH-VQA show that MC-CoT surpasses standalone MLLMs and various multimodality CoT frameworks in recall rate and accuracy. These findings highlight the importance of incorporating background information and detailed guidance in addressing complex zero-shot Med-VQA tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04521v1-abstract-full').style.display = 'none'; document.getElementById('2410.04521v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 14 figures, 6 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.18839">arXiv:2409.18839</a> <span> [<a href="https://arxiv.org/pdf/2409.18839">pdf</a>, <a href="https://arxiv.org/format/2409.18839">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MinerU: An Open-Source Solution for Precise Document Content Extraction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bin Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+C">Chao Xu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xiaomeng Zhao</a>, <a href="/search/cs?searchtype=author&query=Ouyang%2C+L">Linke Ouyang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+F">Fan Wu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhiyuan Zhao</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+R">Rui Xu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kaiwen Liu</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+Y">Yuan Qu</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+F">Fukai Shang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Bo Zhang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Liqun Wei</a>, <a href="/search/cs?searchtype=author&query=Sui%2C+Z">Zhihao Sui</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wei Li</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+B">Botian Shi</a>, <a href="/search/cs?searchtype=author&query=Qiao%2C+Y">Yu Qiao</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+D">Dahua Lin</a>, <a href="/search/cs?searchtype=author&query=He%2C+C">Conghui He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.18839v1-abstract-short" style="display: inline;"> Document content analysis has been a crucial research area in computer vision. Despite significant advancements in methods such as OCR, layout detection, and formula recognition, existing open-source solutions struggle to consistently deliver high-quality content extraction due to the diversity in document types and content. To address these challenges, we present MinerU, an open-source solution f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18839v1-abstract-full').style.display = 'inline'; document.getElementById('2409.18839v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.18839v1-abstract-full" style="display: none;"> Document content analysis has been a crucial research area in computer vision. Despite significant advancements in methods such as OCR, layout detection, and formula recognition, existing open-source solutions struggle to consistently deliver high-quality content extraction due to the diversity in document types and content. To address these challenges, we present MinerU, an open-source solution for high-precision document content extraction. MinerU leverages the sophisticated PDF-Extract-Kit models to extract content from diverse documents effectively and employs finely-tuned preprocessing and postprocessing rules to ensure the accuracy of the final results. Experimental results demonstrate that MinerU consistently achieves high performance across various document types, significantly enhancing the quality and consistency of content extraction. The MinerU open-source project is available at https://github.com/opendatalab/MinerU. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18839v1-abstract-full').style.display = 'none'; document.getElementById('2409.18839v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MinerU Technical Report</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14556">arXiv:2409.14556</a> <span> [<a href="https://arxiv.org/pdf/2409.14556">pdf</a>, <a href="https://arxiv.org/format/2409.14556">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> RACOON: An LLM-based Framework for Retrieval-Augmented Column Type Annotation with a Knowledge Graph </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+L+L">Lindsey Linxi Wei</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+G">Guorui Xiao</a>, <a href="/search/cs?searchtype=author&query=Balazinska%2C+M">Magdalena Balazinska</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14556v2-abstract-short" style="display: inline;"> As an important component of data exploration and integration, Column Type Annotation (CTA) aims to label columns of a table with one or more semantic types. With the recent development of Large Language Models (LLMs), researchers have started to explore the possibility of using LLMs for CTA, leveraging their strong zero-shot capabilities. In this paper, we build on this promising work and improve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14556v2-abstract-full').style.display = 'inline'; document.getElementById('2409.14556v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14556v2-abstract-full" style="display: none;"> As an important component of data exploration and integration, Column Type Annotation (CTA) aims to label columns of a table with one or more semantic types. With the recent development of Large Language Models (LLMs), researchers have started to explore the possibility of using LLMs for CTA, leveraging their strong zero-shot capabilities. In this paper, we build on this promising work and improve on LLM-based methods for CTA by showing how to use a Knowledge Graph (KG) to augment the context information provided to the LLM. Our approach, called RACOON, combines both pre-trained parametric and non-parametric knowledge during generation to improve LLMs' performance on CTA. Our experiments show that RACOON achieves up to a 0.21 micro F-1 improvement compared against vanilla LLM inference. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14556v2-abstract-full').style.display = 'none'; document.getElementById('2409.14556v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13286">arXiv:2409.13286</a> <span> [<a href="https://arxiv.org/pdf/2409.13286">pdf</a>, <a href="https://arxiv.org/ps/2409.13286">ps</a>, <a href="https://arxiv.org/format/2409.13286">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Generative Learning Powered Probing Beam Optimization for Cell-Free Hybrid Beamforming </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Cheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+S">Shuangbo Xiong</a>, <a href="/search/cs?searchtype=author&query=He%2C+M">Mengqing He</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lan Wei</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yongming Huang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13286v1-abstract-short" style="display: inline;"> Probing beam measurement (PBM)-based hybrid beamforming provides a feasible solution for cell-free MIMO. In this letter, we propose a novel probing beam optimization framework where three collaborative modules respectively realize PBM augmentation, sum-rate prediction and probing beam optimization. Specifically, the PBM augmentation model integrates the conditional variational auto-encoder (CVAE)… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13286v1-abstract-full').style.display = 'inline'; document.getElementById('2409.13286v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13286v1-abstract-full" style="display: none;"> Probing beam measurement (PBM)-based hybrid beamforming provides a feasible solution for cell-free MIMO. In this letter, we propose a novel probing beam optimization framework where three collaborative modules respectively realize PBM augmentation, sum-rate prediction and probing beam optimization. Specifically, the PBM augmentation model integrates the conditional variational auto-encoder (CVAE) and mixture density networks and adopts correlated PBM distribution with full-covariance, for which a Cholesky-decomposition based training is introduced to address the issues of covariance legality and numerical stability. Simulations verify the better performance of the proposed augmentation model compared to the traditional CVAE and the efficiency of proposed optimization framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13286v1-abstract-full').style.display = 'none'; document.getElementById('2409.13286v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13191">arXiv:2409.13191</a> <span> [<a href="https://arxiv.org/pdf/2409.13191">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> An adapted large language model facilitates multiple medical tasks in diabetes care </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lai Wei</a>, <a href="/search/cs?searchtype=author&query=Ying%2C+Z">Zhen Ying</a>, <a href="/search/cs?searchtype=author&query=He%2C+M">Muyang He</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yutong Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Q">Qian Yang</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+Y">Yanzhe Hong</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+J">Jiaping Lu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaoying Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Weiran Huang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Ying Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13191v1-abstract-short" style="display: inline;"> Diabetes is a chronic disease that poses a significant global health burden, and optimizing diabetes management requires multi-stakeholder collaboration. Large language models (LLMs) have shown promise in various healthcare scenarios, but their effectiveness across a diverse range of diabetes tasks remains unproven. In this study, we introduced a framework to train and validate diabetes-specific L… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13191v1-abstract-full').style.display = 'inline'; document.getElementById('2409.13191v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13191v1-abstract-full" style="display: none;"> Diabetes is a chronic disease that poses a significant global health burden, and optimizing diabetes management requires multi-stakeholder collaboration. Large language models (LLMs) have shown promise in various healthcare scenarios, but their effectiveness across a diverse range of diabetes tasks remains unproven. In this study, we introduced a framework to train and validate diabetes-specific LLMs. We first developed a comprehensive data processing pipeline that includes data collection, filtering, augmentation and refinement. This approach contributes to creating a high-quality, diabetes-specific dataset, and several evaluation benchmarks entirely from scratch. Utilizing the collected training dataset, we fine-tuned a diabetes-specific LLM family that demonstrated state-of-the-art proficiency in understanding and processing various diabetes tasks compared to other LLMs. Furthermore, clinical studies showed the potential applications of our models in diabetes care, including providing personalized healthcare, assisting medical education, and streamlining clinical tasks. In conclusion, our study introduced a framework to develop and evaluate a diabetes-specific LLM family, and highlighted its potential to enhance clinical practice and provide personalized, data-driven support for diabetes support when facing different end users. The code is provided via GitHub at https://github.com/waltonfuture/Diabetica. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13191v1-abstract-full').style.display = 'none'; document.getElementById('2409.13191v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.06679">arXiv:2409.06679</a> <span> [<a href="https://arxiv.org/pdf/2409.06679">pdf</a>, <a href="https://arxiv.org/format/2409.06679">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> E2LLM: Encoder Elongated Large Language Models for Long-Context Understanding and Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liao%2C+Z">Zihan Liao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jun Wang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+H">Hang Yu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lingxiao Wei</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jianguo Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jun Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.06679v1-abstract-short" style="display: inline;"> In the realm of Large Language Models (LLMs), the ability to process long contexts is increasingly crucial for tasks such as multi-round dialogues, code generation, and document summarization. This paper addresses the challenges of enhancing the long-context performance, reducing computational complexity, and leveraging pretrained models collectively termed the "impossible triangle." We introduce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06679v1-abstract-full').style.display = 'inline'; document.getElementById('2409.06679v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.06679v1-abstract-full" style="display: none;"> In the realm of Large Language Models (LLMs), the ability to process long contexts is increasingly crucial for tasks such as multi-round dialogues, code generation, and document summarization. This paper addresses the challenges of enhancing the long-context performance, reducing computational complexity, and leveraging pretrained models collectively termed the "impossible triangle." We introduce E2LLM (Encoder Elongated Large Language Models), a novel approach that effectively navigates this paradox. The method involves splitting long contexts into chunks, compressing each into embedding vectors via a pretrained text encoder, and utilizing an adapter to align these representations with a decoder-only LLM. Two training objectives, focusing on reconstruction of the encoder output and long-context instruction fine-tuning, are employed to facilitate the understanding of soft prompts by the LLM. Experimental results demonstrate that E2LLM achieves superior performance in long-context scenarios while balancing efficiency, performance, and compatibility with pretrained models. Our framework thus represents a significant advancement in the field, contributing to effective long-text modeling. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06679v1-abstract-full').style.display = 'none'; document.getElementById('2409.06679v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00649">arXiv:2409.00649</a> <span> [<a href="https://arxiv.org/pdf/2409.00649">pdf</a>, <a href="https://arxiv.org/format/2409.00649">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DeReStainer: H&E to IHC Pathological Image Translation via Decoupled Staining Channels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+L">Linda Wei</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+S">Shengyi Hua</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shaoting Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaofan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00649v1-abstract-short" style="display: inline;"> Breast cancer is a highly fatal disease among cancers in women, and early detection is crucial for treatment. HER2 status, a valuable diagnostic marker based on Immunohistochemistry (IHC) staining, is instrumental in determining breast cancer status. The high cost of IHC staining and the ubiquity of Hematoxylin and Eosin (H&E) staining make the conversion from H&E to IHC staining essential. In thi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00649v1-abstract-full').style.display = 'inline'; document.getElementById('2409.00649v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00649v1-abstract-full" style="display: none;"> Breast cancer is a highly fatal disease among cancers in women, and early detection is crucial for treatment. HER2 status, a valuable diagnostic marker based on Immunohistochemistry (IHC) staining, is instrumental in determining breast cancer status. The high cost of IHC staining and the ubiquity of Hematoxylin and Eosin (H&E) staining make the conversion from H&E to IHC staining essential. In this article, we propose a destain-restain framework for converting H&E staining to IHC staining, leveraging the characteristic that H&E staining and IHC staining of the same tissue sections share the Hematoxylin channel. We further design loss functions specifically for Hematoxylin and Diaminobenzidin (DAB) channels to generate IHC images exploiting insights from separated staining channels. Beyond the benchmark metrics on BCI contest, we have developed semantic information metrics for the HER2 level. The experimental results demonstrated that our method outperforms previous open-sourced methods in terms of image intrinsic property and semantic information. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00649v1-abstract-full').style.display = 'none'; document.getElementById('2409.00649v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14678">arXiv:2408.14678</a> <span> [<a href="https://arxiv.org/pdf/2408.14678">pdf</a>, <a href="https://arxiv.org/format/2408.14678">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Bridging the Gap: Unpacking the Hidden Challenges in Knowledge Distillation for Online Ranking Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Khani%2C+N">Nikhil Khani</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Shuo Yang</a>, <a href="/search/cs?searchtype=author&query=Nath%2C+A">Aniruddh Nath</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Abbo%2C+P">Pendo Abbo</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Li Wei</a>, <a href="/search/cs?searchtype=author&query=Andrews%2C+S">Shawn Andrews</a>, <a href="/search/cs?searchtype=author&query=Kula%2C+M">Maciej Kula</a>, <a href="/search/cs?searchtype=author&query=Kahn%2C+J">Jarrod Kahn</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhe Zhao</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+L">Lichan Hong</a>, <a href="/search/cs?searchtype=author&query=Chi%2C+E">Ed Chi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14678v1-abstract-short" style="display: inline;"> Knowledge Distillation (KD) is a powerful approach for compressing a large model into a smaller, more efficient model, particularly beneficial for latency-sensitive applications like recommender systems. However, current KD research predominantly focuses on Computer Vision (CV) and NLP tasks, overlooking unique data characteristics and challenges inherent to recommender systems. This paper address… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14678v1-abstract-full').style.display = 'inline'; document.getElementById('2408.14678v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14678v1-abstract-full" style="display: none;"> Knowledge Distillation (KD) is a powerful approach for compressing a large model into a smaller, more efficient model, particularly beneficial for latency-sensitive applications like recommender systems. However, current KD research predominantly focuses on Computer Vision (CV) and NLP tasks, overlooking unique data characteristics and challenges inherent to recommender systems. This paper addresses these overlooked challenges, specifically: (1) mitigating data distribution shifts between teacher and student models, (2) efficiently identifying optimal teacher configurations within time and budgetary constraints, and (3) enabling computationally efficient and rapid sharing of teacher labels to support multiple students. We present a robust KD system developed and rigorously evaluated on multiple large-scale personalized video recommendation systems within Google. Our live experiment results demonstrate significant improvements in student model performance while ensuring consistent and reliable generation of high quality teacher labels from a continuous data stream of data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14678v1-abstract-full').style.display = 'none'; document.getElementById('2408.14678v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.12171">arXiv:2408.12171</a> <span> [<a href="https://arxiv.org/pdf/2408.12171">pdf</a>, <a href="https://arxiv.org/format/2408.12171">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Recent Advances on Machine Learning for Computational Fluid Dynamics: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haixin Wang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yadi Cao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zijie Huang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuxuan Liu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+P">Peiyan Hu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+X">Xiao Luo</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Z">Zezheng Song</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Wanjia Zhao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jilin Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jinan Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shikun Zhang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Long Wei</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yue Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+T">Tailin Wu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Z">Zhi-Ming Ma</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yizhou Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.12171v1-abstract-short" style="display: inline;"> This paper explores the recent advancements in enhancing Computational Fluid Dynamics (CFD) tasks through Machine Learning (ML) techniques. We begin by introducing fundamental concepts, traditional methods, and benchmark datasets, then examine the various roles ML plays in improving CFD. The literature systematically reviews papers in recent five years and introduces a novel classification for for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12171v1-abstract-full').style.display = 'inline'; document.getElementById('2408.12171v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.12171v1-abstract-full" style="display: none;"> This paper explores the recent advancements in enhancing Computational Fluid Dynamics (CFD) tasks through Machine Learning (ML) techniques. We begin by introducing fundamental concepts, traditional methods, and benchmark datasets, then examine the various roles ML plays in improving CFD. The literature systematically reviews papers in recent five years and introduces a novel classification for forward modeling: Data-driven Surrogates, Physics-Informed Surrogates, and ML-assisted Numerical Solutions. Furthermore, we also review the latest ML methods in inverse design and control, offering a novel classification and providing an in-depth discussion. Then we highlight real-world applications of ML for CFD in critical scientific and engineering disciplines, including aerodynamics, combustion, atmosphere & ocean science, biology fluid, plasma, symbolic regression, and reduced order modeling. Besides, we identify key challenges and advocate for future research directions to address these challenges, such as multi-scale representation, physical knowledge encoding, scientific foundation model and automatic scientific discovery. This review serves as a guide for the rapidly expanding ML for CFD community, aiming to inspire insights for future advancements. We draw the conclusion that ML is poised to significantly transform CFD research by enhancing simulation accuracy, reducing computational time, and enabling more complex analyses of fluid dynamics. The paper resources can be viewed at https://github.com/WillDreamer/Awesome-AI4CFD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12171v1-abstract-full').style.display = 'none'; document.getElementById('2408.12171v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.12169">arXiv:2408.12169</a> <span> [<a href="https://arxiv.org/pdf/2408.12169">pdf</a>, <a href="https://arxiv.org/format/2408.12169">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> ReorderBench: A Benchmark for Matrix Reordering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jiangning Zhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zheng Wang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Z">Zhiyang Shen</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lai Wei</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+F">Fengyuan Tian</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mengchen Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shixia Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.12169v1-abstract-short" style="display: inline;"> Matrix reordering permutes the rows and columns of a matrix to reveal meaningful visual patterns, such as blocks that represent clusters. A comprehensive collection of matrices, along with a scoring method for measuring the quality of visual patterns in these matrices, contributes to building a benchmark. This benchmark is essential for selecting or designing suitable reordering algorithms for spe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12169v1-abstract-full').style.display = 'inline'; document.getElementById('2408.12169v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.12169v1-abstract-full" style="display: none;"> Matrix reordering permutes the rows and columns of a matrix to reveal meaningful visual patterns, such as blocks that represent clusters. A comprehensive collection of matrices, along with a scoring method for measuring the quality of visual patterns in these matrices, contributes to building a benchmark. This benchmark is essential for selecting or designing suitable reordering algorithms for specific tasks. In this paper, we build a matrix reordering benchmark, ReorderBench, with the goal of evaluating and improving matrix reordering techniques. This is achieved by generating a large set of representative and diverse matrices and scoring these matrices with a convolution- and entropy-based method. Our benchmark contains 2,835,000 binary matrices and 5,670,000 continuous matrices, each featuring one of four visual patterns: block, off-diagonal block, star, or band. We demonstrate the usefulness of ReorderBench through three main applications in matrix reordering: 1) evaluating different reordering algorithms, 2) creating a unified scoring model to measure the visual patterns in any matrix, and 3) developing a deep learning model for matrix reordering. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12169v1-abstract-full').style.display = 'none'; document.getElementById('2408.12169v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to IEEE TVCG</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08070">arXiv:2408.08070</a> <span> [<a href="https://arxiv.org/pdf/2408.08070">pdf</a>, <a href="https://arxiv.org/format/2408.08070">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MambaMIM: Pre-training Mamba with State Space Token-interpolation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+F">Fenghe Tang</a>, <a href="/search/cs?searchtype=author&query=Nian%2C+B">Bingkun Nian</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yingtai Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jie Yang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Liu Wei</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+S+K">S. Kevin Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08070v1-abstract-short" style="display: inline;"> Generative self-supervised learning demonstrates outstanding representation learning capabilities in both Convolutional Neural Networks (CNNs) and Vision Transformers (ViTs). However, there are currently no generative pre-training methods related to selective state space models (Mamba) that can handle long-range dependencies effectively. To address this challenge, we introduce a generative self-su… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08070v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08070v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08070v1-abstract-full" style="display: none;"> Generative self-supervised learning demonstrates outstanding representation learning capabilities in both Convolutional Neural Networks (CNNs) and Vision Transformers (ViTs). However, there are currently no generative pre-training methods related to selective state space models (Mamba) that can handle long-range dependencies effectively. To address this challenge, we introduce a generative self-supervised learning method for Mamba (MambaMIM) based on Selective Structure State Space Sequence Token-interpolation (S6T), a general-purpose pre-training method for arbitrary Mamba architectures. Our method, MambaMIM, incorporates a bottom-up 3D hybrid masking strategy in the encoder to maintain masking consistency across different architectures. Additionally, S6T is employed to learn causal relationships between the masked sequence in the state space. MambaMIM can be used on any single or hybrid Mamba architectures to enhance the Mamba long-range representation capability. Extensive downstream experiments reveal the feasibility and advancement of using Mamba for pre-training medical image tasks. The code is available at: https://github.com/FengheTan9/MambaMIM <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08070v1-abstract-full').style.display = 'none'; document.getElementById('2408.08070v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.06512">arXiv:2408.06512</a> <span> [<a href="https://arxiv.org/pdf/2408.06512">pdf</a>, <a href="https://arxiv.org/format/2408.06512">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Learned Ranking Function: From Short-term Behavior Predictions to Long-term User Satisfaction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yi Wu</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+D">Daryl Chang</a>, <a href="/search/cs?searchtype=author&query=She%2C+J">Jennifer She</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhe Zhao</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Li Wei</a>, <a href="/search/cs?searchtype=author&query=Heldt%2C+L">Lukasz Heldt</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.06512v1-abstract-short" style="display: inline;"> We present the Learned Ranking Function (LRF), a system that takes short-term user-item behavior predictions as input and outputs a slate of recommendations that directly optimizes for long-term user satisfaction. Most previous work is based on optimizing the hyperparameters of a heuristic function. We propose to model the problem directly as a slate optimization problem with the objective of maxi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06512v1-abstract-full').style.display = 'inline'; document.getElementById('2408.06512v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.06512v1-abstract-full" style="display: none;"> We present the Learned Ranking Function (LRF), a system that takes short-term user-item behavior predictions as input and outputs a slate of recommendations that directly optimizes for long-term user satisfaction. Most previous work is based on optimizing the hyperparameters of a heuristic function. We propose to model the problem directly as a slate optimization problem with the objective of maximizing long-term user satisfaction. We also develop a novel constraint optimization algorithm that stabilizes objective trade-offs for multi-objective optimization. We evaluate our approach with live experiments and describe its deployment on YouTube. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06512v1-abstract-full').style.display = 'none'; document.getElementById('2408.06512v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">RecSys 24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05719">arXiv:2408.05719</a> <span> [<a href="https://arxiv.org/pdf/2408.05719">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> MR-ULINS: A Tightly-Coupled UWB-LiDAR-Inertial Estimator with Multi-Epoch Outlier Rejection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tisheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+M">Man Yuan</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Linfu Wei</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yan Wang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+H">Hailiang Tang</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+X">Xiaoji Niu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05719v1-abstract-short" style="display: inline;"> The LiDAR-inertial odometry (LIO) and the ultra-wideband (UWB) have been integrated together to achieve driftless positioning in global navigation satellite system (GNSS)-denied environments. However, the UWB may be affected by systematic range errors (such as the clock drift and the antenna phase center offset) and non-line-of-sight (NLOS) signals, resulting in reduced robustness. In this study,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05719v1-abstract-full').style.display = 'inline'; document.getElementById('2408.05719v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05719v1-abstract-full" style="display: none;"> The LiDAR-inertial odometry (LIO) and the ultra-wideband (UWB) have been integrated together to achieve driftless positioning in global navigation satellite system (GNSS)-denied environments. However, the UWB may be affected by systematic range errors (such as the clock drift and the antenna phase center offset) and non-line-of-sight (NLOS) signals, resulting in reduced robustness. In this study, we propose a UWB-LiDAR-inertial estimator (MR-ULINS) that tightly integrates the UWB range, LiDAR frame-to-frame, and IMU measurements within the multi-state constraint Kalman filter (MSCKF) framework. The systematic range errors are precisely modeled to be estimated and compensated online. Besides, we propose a multi-epoch outlier rejection algorithm for UWB NLOS by utilizing the relative accuracy of the LIO. Specifically, the relative trajectory of the LIO is employed to verify the consistency of all range measurements within the sliding window. Extensive experiment results demonstrate that MR-ULINS achieves a positioning accuracy of around 0.1 m in complex indoor environments with severe NLOS interference. Ablation experiments show that the online estimation and multi-epoch outlier rejection can effectively improve the positioning accuracy. Besides, MR-ULINS maintains high accuracy and robustness in LiDAR-degenerated scenes and UWB-challenging conditions with spare base stations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05719v1-abstract-full').style.display = 'none'; document.getElementById('2408.05719v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.03124">arXiv:2408.03124</a> <span> [<a href="https://arxiv.org/pdf/2408.03124">pdf</a>, <a href="https://arxiv.org/format/2408.03124">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Closed-loop Diffusion Control of Complex Physical Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+L">Long Wei</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+H">Haodong Feng</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yuchen Yang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+R">Ruiqi Feng</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+P">Peiyan Hu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+X">Xiang Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tao Zhang</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+D">Dixia Fan</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+T">Tailin Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.03124v2-abstract-short" style="display: inline;"> The control problems of complex physical systems have broad applications in science and engineering. Previous studies have shown that generative control methods based on diffusion models offer significant advantages for solving these problems. However, existing generative control approaches face challenges in both performance and efficiency when extended to the closed-loop setting, which is essent… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03124v2-abstract-full').style.display = 'inline'; document.getElementById('2408.03124v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.03124v2-abstract-full" style="display: none;"> The control problems of complex physical systems have broad applications in science and engineering. Previous studies have shown that generative control methods based on diffusion models offer significant advantages for solving these problems. However, existing generative control approaches face challenges in both performance and efficiency when extended to the closed-loop setting, which is essential for effective control. In this paper, we propose an efficient Closed-Loop Diffusion method for Physical systems Control (CL-DiffPhyCon). By employing an asynchronous denoising framework for different physical time steps, CL-DiffPhyCon generates control signals conditioned on real-time feedback from the environment with significantly reduced computational cost during sampling. Additionally, the control process could be further accelerated by incorporating fast sampling techniques, such as DDIM. We evaluate CL-DiffPhyCon on two tasks: 1D Burgers' equation control and 2D incompressible fluid control. The results demonstrate that CL-DiffPhyCon achieves superior control performance with significant improvements in sampling efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03124v2-abstract-full').style.display = 'none'; document.getElementById('2408.03124v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.01810">arXiv:2408.01810</a> <span> [<a href="https://arxiv.org/pdf/2408.01810">pdf</a>, <a href="https://arxiv.org/format/2408.01810">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Demystifying Device-specific Compatibility Issues in Android Apps </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+J">Junfeng Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+K">Kevin Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yifei Chen</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lili Wei</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yepang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.01810v1-abstract-short" style="display: inline;"> The Android ecosystem is profoundly fragmented due to the frequent updates of the Android system and the prevalent customizations by mobile device manufacturers. Previous research primarily focused on identifying and repairing evolution-induced API compatibility issues, with limited consideration of devices-specific compatibility issues (DSC issues). To fill this gap, we conduct an empirical study… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01810v1-abstract-full').style.display = 'inline'; document.getElementById('2408.01810v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.01810v1-abstract-full" style="display: none;"> The Android ecosystem is profoundly fragmented due to the frequent updates of the Android system and the prevalent customizations by mobile device manufacturers. Previous research primarily focused on identifying and repairing evolution-induced API compatibility issues, with limited consideration of devices-specific compatibility issues (DSC issues). To fill this gap, we conduct an empirical study of 197 DSC issues collected from 94 open-source repositories on GitHub. We introduce a new perspective for comprehending these issues by categorizing them into two principal groups, Functionality Breaks, and OEM Features, based on their manifestations and root causes. The functionality break issues disrupt standard Android system behaviors, lead to crashes or unexpected behaviors on specific devices, and require developers to implement workarounds to preserve the original functionality. The OEM feature issues involve the introduction of device-specific functionalities or features beyond the basic Android system. The different nature of functionality break issues and OEM feature issues lead to unique challenges in addressing them. Common solutions for functionality break issues involve calling additional APIs, substituting problematic ones, or using specific parameters, while resolving OEM feature issues often relies on Android inter-component communication methods and reflection, with additional unconventional strategies. Such observations highlight the distinctive challenges in addressing DSC issues in Android apps and will facilitate the future development of testing and analysis tools targeting these issues. Our study demonstrates that Functionality break and OEM feature issues have different characteristics, and future research may need to investigate them separately. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01810v1-abstract-full').style.display = 'none'; document.getElementById('2408.01810v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICSME'24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00293">arXiv:2408.00293</a> <span> [<a href="https://arxiv.org/pdf/2408.00293">pdf</a>, <a href="https://arxiv.org/format/2408.00293">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Gradient Flow Decoding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wadayama%2C+T">Tadashi Wadayama</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lantian Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00293v1-abstract-short" style="display: inline;"> This paper presents the Gradient Flow (GF) decoding for LDPC codes. GF decoding, a continuous-time methodology based on gradient flow, employs a potential energy function associated with bipolar codewords of LDPC codes. The decoding process of the GF decoding is concisely defined by an ordinary differential equation and thus it is well suited to an analog circuit implementation. We experimentally… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00293v1-abstract-full').style.display = 'inline'; document.getElementById('2408.00293v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00293v1-abstract-full" style="display: none;"> This paper presents the Gradient Flow (GF) decoding for LDPC codes. GF decoding, a continuous-time methodology based on gradient flow, employs a potential energy function associated with bipolar codewords of LDPC codes. The decoding process of the GF decoding is concisely defined by an ordinary differential equation and thus it is well suited to an analog circuit implementation. We experimentally demonstrate that the decoding performance of the GF decoding for AWGN channels is comparable to that of the multi-bit mode gradient descent bit flipping algorithm. We further introduce the negative log-likelihood function of the channel for generalizing the GF decoding. The proposed method is shown to be tensor-computable, which means that the gradient of the objective function can be evaluated with the combination of basic tensor computations. This characteristic is well-suited to emerging AI accelerators, potentially applicable in wireless signal processing. The paper assesses the decoding performance of the generalized GF decoding in LDPC-coded MIMO channels. Our numerical experiments reveal that the decoding performance rivals that of established techniques like MMSE + BP. Furthermore, an exploration of score-based channel learning for capturing statistical properties is also provided. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00293v1-abstract-full').style.display = 'none'; document.getElementById('2408.00293v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.21418">arXiv:2407.21418</a> <span> [<a href="https://arxiv.org/pdf/2407.21418">pdf</a>, <a href="https://arxiv.org/format/2407.21418">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> FTuner: A Fast Dynamic Shape Tensors Program Auto-Tuner for Deep Learning Compilers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mu%2C+P">Pengyu Mu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Linquan Wei</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yi Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Rui Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.21418v1-abstract-short" style="display: inline;"> Many artificial intelligence models process input data of different lengths and resolutions, making the shape of the tensors dynamic. The performance of these models depends on the shape of the tensors, which makes it difficult to optimize the tensors before the model runs. There are two common solutions to this problem. The first is to add useless data to the input to match a pre-optimized tensor… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21418v1-abstract-full').style.display = 'inline'; document.getElementById('2407.21418v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.21418v1-abstract-full" style="display: none;"> Many artificial intelligence models process input data of different lengths and resolutions, making the shape of the tensors dynamic. The performance of these models depends on the shape of the tensors, which makes it difficult to optimize the tensors before the model runs. There are two common solutions to this problem. The first is to add useless data to the input to match a pre-optimized tensor library. The second is to use small basic tensors to create a tensor that is closest in size to the input data and then tune it to minimize padding. However, this second solution can be time-consuming. This paper proposes a new technique for deep learning compilers called FTuner. Instead of using a large design space or training a cost model, we use an abstract computational unit called the uKernel to patch together small, various-sized tensors to match the shape of the input tensor. We determine the shape of the uKernel using an analytic hardware information model. Experiments show that the FTuner can achieve comparable operators and end-to-end performance to vendor libraries and achieves 3\% speedup on existing auto-tuner with the model-training compiler while reducing tuning time by two orders of magnitude. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21418v1-abstract-full').style.display = 'none'; document.getElementById('2407.21418v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 16 figures, 6 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68M20 (Primary) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.15399">arXiv:2407.15399</a> <span> [<a href="https://arxiv.org/pdf/2407.15399">pdf</a>, <a href="https://arxiv.org/format/2407.15399">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Imposter.AI: Adversarial Attacks with Hidden Intentions towards Aligned Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiao Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Liangzhi Li</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+T">Tong Xiang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+F">Fuying Ye</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lu Wei</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wangyue Li</a>, <a href="/search/cs?searchtype=author&query=Garcia%2C+N">Noa Garcia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.15399v1-abstract-short" style="display: inline;"> With the development of large language models (LLMs) like ChatGPT, both their vast applications and potential vulnerabilities have come to the forefront. While developers have integrated multiple safety mechanisms to mitigate their misuse, a risk remains, particularly when models encounter adversarial inputs. This study unveils an attack mechanism that capitalizes on human conversation strategies… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15399v1-abstract-full').style.display = 'inline'; document.getElementById('2407.15399v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.15399v1-abstract-full" style="display: none;"> With the development of large language models (LLMs) like ChatGPT, both their vast applications and potential vulnerabilities have come to the forefront. While developers have integrated multiple safety mechanisms to mitigate their misuse, a risk remains, particularly when models encounter adversarial inputs. This study unveils an attack mechanism that capitalizes on human conversation strategies to extract harmful information from LLMs. We delineate three pivotal strategies: (i) decomposing malicious questions into seemingly innocent sub-questions; (ii) rewriting overtly malicious questions into more covert, benign-sounding ones; (iii) enhancing the harmfulness of responses by prompting models for illustrative examples. Unlike conventional methods that target explicit malicious responses, our approach delves deeper into the nature of the information provided in responses. Through our experiments conducted on GPT-3.5-turbo, GPT-4, and Llama2, our method has demonstrated a marked efficacy compared to conventional attack methods. In summary, this work introduces a novel attack method that outperforms previous approaches, raising an important question: How to discern whether the ultimate intent in a dialogue is malicious? <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15399v1-abstract-full').style.display = 'none'; document.getElementById('2407.15399v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.13841">arXiv:2407.13841</a> <span> [<a href="https://arxiv.org/pdf/2407.13841">pdf</a>, <a href="https://arxiv.org/format/2407.13841">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Many Perception Tasks are Highly Redundant Functions of their Input Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ramesh%2C+R">Rahul Ramesh</a>, <a href="/search/cs?searchtype=author&query=Bisulco%2C+A">Anthony Bisulco</a>, <a href="/search/cs?searchtype=author&query=DiTullio%2C+R+W">Ronald W. DiTullio</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Linran Wei</a>, <a href="/search/cs?searchtype=author&query=Balasubramanian%2C+V">Vijay Balasubramanian</a>, <a href="/search/cs?searchtype=author&query=Daniilidis%2C+K">Kostas Daniilidis</a>, <a href="/search/cs?searchtype=author&query=Chaudhari%2C+P">Pratik Chaudhari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.13841v1-abstract-short" style="display: inline;"> We show that many perception tasks, from visual recognition, semantic segmentation, optical flow, depth estimation to vocalization discrimination, are highly redundant functions of their input data. Images or spectrograms, projected into different subspaces, formed by orthogonal bases in pixel, Fourier or wavelet domains, can be used to solve these tasks remarkably well regardless of whether it is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13841v1-abstract-full').style.display = 'inline'; document.getElementById('2407.13841v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.13841v1-abstract-full" style="display: none;"> We show that many perception tasks, from visual recognition, semantic segmentation, optical flow, depth estimation to vocalization discrimination, are highly redundant functions of their input data. Images or spectrograms, projected into different subspaces, formed by orthogonal bases in pixel, Fourier or wavelet domains, can be used to solve these tasks remarkably well regardless of whether it is the top subspace where data varies the most, some intermediate subspace with moderate variability--or the bottom subspace where data varies the least. This phenomenon occurs because different subspaces have a large degree of redundant information relevant to the task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13841v1-abstract-full').style.display = 'none'; document.getElementById('2407.13841v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.10474">arXiv:2407.10474</a> <span> [<a href="https://arxiv.org/pdf/2407.10474">pdf</a>, <a href="https://arxiv.org/format/2407.10474">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Multi-source Knowledge Enhanced Graph Attention Networks for Multimodal Fact Verification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+H">Han Cao</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lingwei Wei</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Wei Zhou</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+S">Songlin Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.10474v1-abstract-short" style="display: inline;"> Multimodal fact verification is an under-explored and emerging field that has gained increasing attention in recent years. The goal is to assess the veracity of claims that involve multiple modalities by analyzing the retrieved evidence. The main challenge in this area is to effectively fuse features from different modalities to learn meaningful multimodal representations. To this end, we propose… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10474v1-abstract-full').style.display = 'inline'; document.getElementById('2407.10474v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.10474v1-abstract-full" style="display: none;"> Multimodal fact verification is an under-explored and emerging field that has gained increasing attention in recent years. The goal is to assess the veracity of claims that involve multiple modalities by analyzing the retrieved evidence. The main challenge in this area is to effectively fuse features from different modalities to learn meaningful multimodal representations. To this end, we propose a novel model named Multi-Source Knowledge-enhanced Graph Attention Network (MultiKE-GAT). MultiKE-GAT introduces external multimodal knowledge from different sources and constructs a heterogeneous graph to capture complex cross-modal and cross-source interactions. We exploit a Knowledge-aware Graph Fusion (KGF) module to learn knowledge-enhanced representations for each claim and evidence and eliminate inconsistencies and noises introduced by redundant entities. Experiments on two public benchmark datasets demonstrate that our model outperforms other comparison methods, showing the effectiveness and superiority of the proposed model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10474v1-abstract-full').style.display = 'none'; document.getElementById('2407.10474v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICME 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.09894">arXiv:2407.09894</a> <span> [<a href="https://arxiv.org/pdf/2407.09894">pdf</a>, <a href="https://arxiv.org/format/2407.09894">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Transferring Structure Knowledge: A New Task to Fake news Detection Towards Cold-Start Propagation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lingwei Wei</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+D">Dou Hu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Wei Zhou</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+S">Songlin Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.09894v1-abstract-short" style="display: inline;"> Many fake news detection studies have achieved promising performance by extracting effective semantic and structure features from both content and propagation trees. However, it is challenging to apply them to practical situations, especially when using the trained propagation-based models to detect news with no propagation data. Towards this scenario, we study a new task named cold-start fake new… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09894v1-abstract-full').style.display = 'inline'; document.getElementById('2407.09894v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.09894v1-abstract-full" style="display: none;"> Many fake news detection studies have achieved promising performance by extracting effective semantic and structure features from both content and propagation trees. However, it is challenging to apply them to practical situations, especially when using the trained propagation-based models to detect news with no propagation data. Towards this scenario, we study a new task named cold-start fake news detection, which aims to detect content-only samples with missing propagation. To achieve the task, we design a simple but effective Structure Adversarial Net (SAN) framework to learn transferable features from available propagation to boost the detection of content-only samples. SAN introduces a structure discriminator to estimate dissimilarities among learned features with and without propagation, and further learns structure-invariant features to enhance the generalization of existing propagation-based methods for content-only samples. We conduct qualitative and quantitative experiments on three datasets. Results show the challenge of the new task and the effectiveness of our SAN framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09894v1-abstract-full').style.display = 'none'; document.getElementById('2407.09894v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICASSP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.08961">arXiv:2407.08961</a> <span> [<a href="https://arxiv.org/pdf/2407.08961">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Tissue-Contrastive Semi-Masked Autoencoders for Segmentation Pretraining on Chest CT </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+J">Jie Zheng</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+R">Ru Wen</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+H">Haiqin Hu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lina Wei</a>, <a href="/search/cs?searchtype=author&query=Su%2C+K">Kui Su</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wei Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chen Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jun Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.08961v1-abstract-short" style="display: inline;"> Existing Masked Image Modeling (MIM) depends on a spatial patch-based masking-reconstruction strategy to perceive objects'features from unlabeled images, which may face two limitations when applied to chest CT: 1) inefficient feature learning due to complex anatomical details presented in CT images, and 2) suboptimal knowledge transfer owing to input disparity between upstream and downstream model… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08961v1-abstract-full').style.display = 'inline'; document.getElementById('2407.08961v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.08961v1-abstract-full" style="display: none;"> Existing Masked Image Modeling (MIM) depends on a spatial patch-based masking-reconstruction strategy to perceive objects'features from unlabeled images, which may face two limitations when applied to chest CT: 1) inefficient feature learning due to complex anatomical details presented in CT images, and 2) suboptimal knowledge transfer owing to input disparity between upstream and downstream models. To address these issues, we propose a new MIM method named Tissue-Contrastive Semi-Masked Autoencoder (TCS-MAE) for modeling chest CT images. Our method has two novel designs: 1) a tissue-based masking-reconstruction strategy to capture more fine-grained anatomical features, and 2) a dual-AE architecture with contrastive learning between the masked and original image views to bridge the gap of the upstream and downstream models. To validate our method, we systematically investigate representative contrastive, generative, and hybrid self-supervised learning methods on top of tasks involving segmenting pneumonia, mediastinal tumors, and various organs. The results demonstrate that, compared to existing methods, our TCS-MAE more effectively learns tissue-aware representations, thereby significantly enhancing segmentation performance across all tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08961v1-abstract-full').style.display = 'none'; document.getElementById('2407.08961v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.07589">arXiv:2407.07589</a> <span> [<a href="https://arxiv.org/pdf/2407.07589">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> MSC-LIO: An MSCKF-Based LiDAR-Inertial Odometry with Same-Plane-Point Tracking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tisheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+M">Man Yuan</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Linfu Wei</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+H">Hailiang Tang</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+X">Xiaoji Niu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.07589v2-abstract-short" style="display: inline;"> The multi-state constraint Kalman filter (MSCKF) has been proven to be more efficient than graph optimization for visual-based odometry while with similar accuracy. However, it has not yet been properly considered and studied for LiDAR-based odometry. In this paper, we propose a novel tightly coupled LiDAR-inertial odometry based on the MSCKF framework, named MSC-LIO. An efficient LiDAR same-plane… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07589v2-abstract-full').style.display = 'inline'; document.getElementById('2407.07589v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.07589v2-abstract-full" style="display: none;"> The multi-state constraint Kalman filter (MSCKF) has been proven to be more efficient than graph optimization for visual-based odometry while with similar accuracy. However, it has not yet been properly considered and studied for LiDAR-based odometry. In this paper, we propose a novel tightly coupled LiDAR-inertial odometry based on the MSCKF framework, named MSC-LIO. An efficient LiDAR same-plane-point (LSPP) tracking method, without explicit feature extraction, is present for frame-to-frame data associations. The tracked LSPPs are employed to build an LSPP measurement model, which constructs a multi-state constraint. Besides, we propose an effective point-velocity-based LiDAR-IMU time-delay (LITD) estimation method, which is derived from the proposed LSPP tracking method. Extensive experiments were conducted on both public and private datasets. The results demonstrate that the proposed MSC-LIO yields higher accuracy and efficiency than the state-of-the-art methods. The ablation experiment results indicate that the data-association efficiency is improved by nearly 3 times using the LSPP tracking method. Besides, the proposed LITD estimation method can effectively and accurately estimate the LITD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07589v2-abstract-full').style.display = 'none'; document.getElementById('2407.07589v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.06494">arXiv:2407.06494</a> <span> [<a href="https://arxiv.org/pdf/2407.06494">pdf</a>, <a href="https://arxiv.org/format/2407.06494">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> DiffPhyCon: A Generative Approach to Control Complex Physical Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+L">Long Wei</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+P">Peiyan Hu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+R">Ruiqi Feng</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+H">Haodong Feng</a>, <a href="/search/cs?searchtype=author&query=Du%2C+Y">Yixuan Du</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tao Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yue Wang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Z">Zhi-Ming Ma</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+T">Tailin Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.06494v4-abstract-short" style="display: inline;"> Controlling the evolution of complex physical systems is a fundamental task across science and engineering. Classical techniques suffer from limited applicability or huge computational costs. On the other hand, recent deep learning and reinforcement learning-based approaches often struggle to optimize long-term control sequences under the constraints of system dynamics. In this work, we introduce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06494v4-abstract-full').style.display = 'inline'; document.getElementById('2407.06494v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.06494v4-abstract-full" style="display: none;"> Controlling the evolution of complex physical systems is a fundamental task across science and engineering. Classical techniques suffer from limited applicability or huge computational costs. On the other hand, recent deep learning and reinforcement learning-based approaches often struggle to optimize long-term control sequences under the constraints of system dynamics. In this work, we introduce Diffusion Physical systems Control (DiffPhyCon), a new class of method to address the physical systems control problem. DiffPhyCon excels by simultaneously minimizing both the learned generative energy function and the predefined control objectives across the entire trajectory and control sequence. Thus, it can explore globally and plan near-optimal control sequences. Moreover, we enhance DiffPhyCon with prior reweighting, enabling the discovery of control sequences that significantly deviate from the training distribution. We test our method on three tasks: 1D Burgers' equation, 2D jellyfish movement control, and 2D high-dimensional smoke control, where our generated jellyfish dataset is released as a benchmark for complex physical system control research. Our method outperforms widely applied classical approaches and state-of-the-art deep learning and reinforcement learning methods. Notably, DiffPhyCon unveils an intriguing fast-close-slow-open pattern observed in the jellyfish, aligning with established findings in the field of fluid dynamics. The project website, jellyfish dataset, and code can be found at https://github.com/AI4Science-WestlakeU/diffphycon. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06494v4-abstract-full').style.display = 'none'; document.getElementById('2407.06494v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024 poster. 51 pages, 19 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.05610">arXiv:2407.05610</a> <span> [<a href="https://arxiv.org/pdf/2407.05610">pdf</a>, <a href="https://arxiv.org/format/2407.05610">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Described Spatial-Temporal Video Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ji%2C+W">Wei Ji</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiangyan Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yingfei Sun</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+J">Jiajun Deng</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+Y">You Qin</a>, <a href="/search/cs?searchtype=author&query=Nuwanna%2C+A">Ammar Nuwanna</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Mengyao Qiu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lina Wei</a>, <a href="/search/cs?searchtype=author&query=Zimmermann%2C+R">Roger Zimmermann</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.05610v1-abstract-short" style="display: inline;"> Detecting visual content on language expression has become an emerging topic in the community. However, in the video domain, the existing setting, i.e., spatial-temporal video grounding (STVG), is formulated to only detect one pre-existing object in each frame, ignoring the fact that language descriptions can involve none or multiple entities within a video. In this work, we advance the STVG to a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05610v1-abstract-full').style.display = 'inline'; document.getElementById('2407.05610v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.05610v1-abstract-full" style="display: none;"> Detecting visual content on language expression has become an emerging topic in the community. However, in the video domain, the existing setting, i.e., spatial-temporal video grounding (STVG), is formulated to only detect one pre-existing object in each frame, ignoring the fact that language descriptions can involve none or multiple entities within a video. In this work, we advance the STVG to a more practical setting called described spatial-temporal video detection (DSTVD) by overcoming the above limitation. To facilitate the exploration of DSTVD, we first introduce a new benchmark, namely DVD-ST. Notably, DVD-ST supports grounding from none to many objects onto the video in response to queries and encompasses a diverse range of over 150 entities, including appearance, actions, locations, and interactions. The extensive breadth and diversity of the DVD-ST dataset make it an exemplary testbed for the investigation of DSTVD. In addition to the new benchmark, we further present two baseline methods for our proposed DSTVD task by extending two representative STVG models, i.e., TubeDETR, and STCAT. These extended models capitalize on tubelet queries to localize and track referred objects across the video sequence. Besides, we adjust the training objectives of these models to optimize spatial and temporal localization accuracy and multi-class classification capabilities. Furthermore, we benchmark the baselines on the introduced DVD-ST dataset and conduct extensive experimental analysis to guide future investigation. Our code and benchmark will be publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05610v1-abstract-full').style.display = 'none'; document.getElementById('2407.05610v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.00431">arXiv:2407.00431</a> <span> [<a href="https://arxiv.org/pdf/2407.00431">pdf</a>, <a href="https://arxiv.org/format/2407.00431">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Location embedding based pairwise distance learning for fine-grained diagnosis of urinary stones </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+Q">Qiangguo Jin</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jiapeng Huang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+C">Changming Sun</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+H">Hui Cui</a>, <a href="/search/cs?searchtype=author&query=Xuan%2C+P">Ping Xuan</a>, <a href="/search/cs?searchtype=author&query=Su%2C+R">Ran Su</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Leyi Wei</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yu-Jie Wu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+C">Chia-An Wu</a>, <a href="/search/cs?searchtype=author&query=Duh%2C+H+B+L">Henry B. L. Duh</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yueh-Hsun Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.00431v1-abstract-short" style="display: inline;"> The precise diagnosis of urinary stones is crucial for devising effective treatment strategies. The diagnostic process, however, is often complicated by the low contrast between stones and surrounding tissues, as well as the variability in stone locations across different patients. To address this issue, we propose a novel location embedding based pairwise distance learning network (LEPD-Net) that… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00431v1-abstract-full').style.display = 'inline'; document.getElementById('2407.00431v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.00431v1-abstract-full" style="display: none;"> The precise diagnosis of urinary stones is crucial for devising effective treatment strategies. The diagnostic process, however, is often complicated by the low contrast between stones and surrounding tissues, as well as the variability in stone locations across different patients. To address this issue, we propose a novel location embedding based pairwise distance learning network (LEPD-Net) that leverages low-dose abdominal X-ray imaging combined with location information for the fine-grained diagnosis of urinary stones. LEPD-Net enhances the representation of stone-related features through context-aware region enhancement, incorporates critical location knowledge via stone location embedding, and achieves recognition of fine-grained objects with our innovative fine-grained pairwise distance learning. Additionally, we have established an in-house dataset on urinary tract stones to demonstrate the effectiveness of our proposed approach. Comprehensive experiments conducted on this dataset reveal that our framework significantly surpasses existing state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00431v1-abstract-full').style.display = 'none'; document.getElementById('2407.00431v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> MICCAI 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.07979">arXiv:2406.07979</a> <span> [<a href="https://arxiv.org/pdf/2406.07979">pdf</a>, <a href="https://arxiv.org/format/2406.07979">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Heuristic Learning with Graph Neural Networks: A Unified Framework for Link Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Juzheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lanning Wei</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhen Xu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+Q">Quanming Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.07979v2-abstract-short" style="display: inline;"> Link prediction is a fundamental task in graph learning, inherently shaped by the topology of the graph. While traditional heuristics are grounded in graph topology, they encounter challenges in generalizing across diverse graphs. Recent research efforts have aimed to leverage the potential of heuristics, yet a unified formulation accommodating both local and global heuristics remains undiscovered… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07979v2-abstract-full').style.display = 'inline'; document.getElementById('2406.07979v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.07979v2-abstract-full" style="display: none;"> Link prediction is a fundamental task in graph learning, inherently shaped by the topology of the graph. While traditional heuristics are grounded in graph topology, they encounter challenges in generalizing across diverse graphs. Recent research efforts have aimed to leverage the potential of heuristics, yet a unified formulation accommodating both local and global heuristics remains undiscovered. Drawing insights from the fact that both local and global heuristics can be represented by adjacency matrix multiplications, we propose a unified matrix formulation to accommodate and generalize various heuristics. We further propose the Heuristic Learning Graph Neural Network (HL-GNN) to efficiently implement the formulation. HL-GNN adopts intra-layer propagation and inter-layer connections, allowing it to reach a depth of around 20 layers with lower time complexity than GCN. Extensive experiments on the Planetoid, Amazon, and OGB datasets underscore the effectiveness and efficiency of HL-GNN. It outperforms existing methods by a large margin in prediction performance. Additionally, HL-GNN is several orders of magnitude faster than heuristic-inspired methods while requiring only a few trainable parameters. The case study further demonstrates that the generalized heuristics and learned weights are highly interpretable. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07979v2-abstract-full').style.display = 'none'; document.getElementById('2406.07979v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by KDD 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.06140">arXiv:2406.06140</a> <span> [<a href="https://arxiv.org/pdf/2406.06140">pdf</a>, <a href="https://arxiv.org/format/2406.06140">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Can I understand what I create? Self-Knowledge Evaluation of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+Z">Zhiquan Tan</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lai Wei</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jindong Wang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+X">Xing Xie</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Weiran Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.06140v1-abstract-short" style="display: inline;"> Large language models (LLMs) have achieved remarkable progress in linguistic tasks, necessitating robust evaluation frameworks to understand their capabilities and limitations. Inspired by Feynman's principle of understanding through creation, we introduce a self-knowledge evaluation framework that is easy to implement, evaluating models on their ability to comprehend and respond to self-generated… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.06140v1-abstract-full').style.display = 'inline'; document.getElementById('2406.06140v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.06140v1-abstract-full" style="display: none;"> Large language models (LLMs) have achieved remarkable progress in linguistic tasks, necessitating robust evaluation frameworks to understand their capabilities and limitations. Inspired by Feynman's principle of understanding through creation, we introduce a self-knowledge evaluation framework that is easy to implement, evaluating models on their ability to comprehend and respond to self-generated questions. Our findings, based on testing multiple models across diverse tasks, reveal significant gaps in the model's self-knowledge ability. Further analysis indicates these gaps may be due to misalignment with human attention mechanisms. Additionally, fine-tuning on self-generated math task may enhance the model's math performance, highlighting the potential of the framework for efficient and insightful model evaluation and may also contribute to the improvement of LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.06140v1-abstract-full').style.display = 'none'; document.getElementById('2406.06140v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.05510">arXiv:2406.05510</a> <span> [<a href="https://arxiv.org/pdf/2406.05510">pdf</a>, <a href="https://arxiv.org/format/2406.05510">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Representation Learning with Conditional Information Flow Maximization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+D">Dou Hu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lingwei Wei</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Wei Zhou</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+S">Songlin Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.05510v2-abstract-short" style="display: inline;"> This paper proposes an information-theoretic representation learning framework, named conditional information flow maximization, to extract noise-invariant sufficient representations for the input data and target task. It promotes the learned representations have good feature uniformity and sufficient predictive ability, which can enhance the generalization of pre-trained language models (PLMs) fo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05510v2-abstract-full').style.display = 'inline'; document.getElementById('2406.05510v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.05510v2-abstract-full" style="display: none;"> This paper proposes an information-theoretic representation learning framework, named conditional information flow maximization, to extract noise-invariant sufficient representations for the input data and target task. It promotes the learned representations have good feature uniformity and sufficient predictive ability, which can enhance the generalization of pre-trained language models (PLMs) for the target task. Firstly, an information flow maximization principle is proposed to learn more sufficient representations for the input and target by simultaneously maximizing both input-representation and representation-label mutual information. Unlike the information bottleneck, we handle the input-representation information in an opposite way to avoid the over-compression issue of latent representations. Besides, to mitigate the negative effect of potential redundant features from the input, we design a conditional information minimization principle to eliminate negative redundant features while preserve noise-invariant features. Experiments on 13 language understanding benchmarks demonstrate that our method effectively improves the performance of PLMs for classification and regression. Extensive experiments show that the learned representations are more sufficient, robust and transferable. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05510v2-abstract-full').style.display = 'none'; document.getElementById('2406.05510v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, accepted to ACL 2024 (main conference), the code is available at https://github.com/zerohd4869/CIFM</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.04675">arXiv:2406.04675</a> <span> [<a href="https://arxiv.org/pdf/2406.04675">pdf</a>, <a href="https://arxiv.org/format/2406.04675">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> OVMR: Open-Vocabulary Recognition with Multi-Modal References </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+Z">Zehong Ma</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shiliang Zhang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Longhui Wei</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Q">Qi Tian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.04675v1-abstract-short" style="display: inline;"> The challenge of open-vocabulary recognition lies in the model has no clue of new categories it is applied to. Existing works have proposed different methods to embed category cues into the model, \eg, through few-shot fine-tuning, providing category names or textual descriptions to Vision-Language Models. Fine-tuning is time-consuming and degrades the generalization capability. Textual descriptio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04675v1-abstract-full').style.display = 'inline'; document.getElementById('2406.04675v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.04675v1-abstract-full" style="display: none;"> The challenge of open-vocabulary recognition lies in the model has no clue of new categories it is applied to. Existing works have proposed different methods to embed category cues into the model, \eg, through few-shot fine-tuning, providing category names or textual descriptions to Vision-Language Models. Fine-tuning is time-consuming and degrades the generalization capability. Textual descriptions could be ambiguous and fail to depict visual details. This paper tackles open-vocabulary recognition from a different perspective by referring to multi-modal clues composed of textual descriptions and exemplar images. Our method, named OVMR, adopts two innovative components to pursue a more robust category cues embedding. A multi-modal classifier is first generated by dynamically complementing textual descriptions with image exemplars. A preference-based refinement module is hence applied to fuse uni-modal and multi-modal classifiers, with the aim to alleviate issues of low-quality exemplar images or textual descriptions. The proposed OVMR is a plug-and-play module, and works well with exemplar images randomly crawled from the Internet. Extensive experiments have demonstrated the promising performance of OVMR, \eg, it outperforms existing methods across various scenarios and setups. Codes are publicly available at \href{https://github.com/Zehong-Ma/OVMR}{https://github.com/Zehong-Ma/OVMR}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04675v1-abstract-full').style.display = 'none'; document.getElementById('2406.04675v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.11548">arXiv:2405.11548</a> <span> [<a href="https://arxiv.org/pdf/2405.11548">pdf</a>, <a href="https://arxiv.org/format/2405.11548">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Online Experimental Design for Causal Discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Elahi%2C+M+Q">Muhammad Qasim Elahi</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lai Wei</a>, <a href="/search/cs?searchtype=author&query=Kocaoglu%2C+M">Murat Kocaoglu</a>, <a href="/search/cs?searchtype=author&query=Ghasemi%2C+M">Mahsa Ghasemi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.11548v3-abstract-short" style="display: inline;"> Causal discovery aims to uncover cause-and-effect relationships encoded in causal graphs by leveraging observational, interventional data, or their combination. The majority of existing causal discovery methods are developed assuming infinite interventional data. We focus on data interventional efficiency and formalize causal discovery from the perspective of online learning, inspired by pure expl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11548v3-abstract-full').style.display = 'inline'; document.getElementById('2405.11548v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.11548v3-abstract-full" style="display: none;"> Causal discovery aims to uncover cause-and-effect relationships encoded in causal graphs by leveraging observational, interventional data, or their combination. The majority of existing causal discovery methods are developed assuming infinite interventional data. We focus on data interventional efficiency and formalize causal discovery from the perspective of online learning, inspired by pure exploration in bandit problems. A graph separating system, consisting of interventions that cut every edge of the graph at least once, is sufficient for learning causal graphs when infinite interventional data is available, even in the worst case. We propose a track-and-stop causal discovery algorithm that adaptively selects interventions from the graph separating system via allocation matching and learns the causal graph based on sampling history. Given any desired confidence value, the algorithm determines a termination condition and runs until it is met. We analyze the algorithm to establish a problem-dependent upper bound on the expected number of required interventional samples. Our proposed algorithm outperforms existing methods in simulations across various randomly generated causal graphs. It achieves higher accuracy, measured by the structural hamming distance (SHD) between the learned causal graph and the ground truth, with significantly fewer samples. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11548v3-abstract-full').style.display = 'none'; document.getElementById('2405.11548v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in Proceedings of ICML 24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.10496">arXiv:2405.10496</a> <span> [<a href="https://arxiv.org/pdf/2405.10496">pdf</a>, <a href="https://arxiv.org/format/2405.10496">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Electromagnetic Information Theory for Holographic MIMO Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+L">Li Wei</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+T">Tierui Gong</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chongwen Huang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhaoyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Sha%2C+W+E+I">Wei E. I. Sha</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z+N">Zhi Ning Chen</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+L">Linglong Dai</a>, <a href="/search/cs?searchtype=author&query=Debbah%2C+M">Merouane Debbah</a>, <a href="/search/cs?searchtype=author&query=Yuen%2C+C">Chau Yuen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.10496v3-abstract-short" style="display: inline;"> Holographic multiple-input multiple-output (HMIMO) utilizes a compact antenna array to form a nearly continuous aperture, thereby enhancing higher capacity and more flexible configurations compared with conventional MIMO systems, making it attractive in current scientific research. Key questions naturally arise regarding the potential of HMIMO to surpass Shannon's theoretical limits and how far it… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10496v3-abstract-full').style.display = 'inline'; document.getElementById('2405.10496v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.10496v3-abstract-full" style="display: none;"> Holographic multiple-input multiple-output (HMIMO) utilizes a compact antenna array to form a nearly continuous aperture, thereby enhancing higher capacity and more flexible configurations compared with conventional MIMO systems, making it attractive in current scientific research. Key questions naturally arise regarding the potential of HMIMO to surpass Shannon's theoretical limits and how far its capabilities can be extended. However, the traditional Shannon information theory falls short in addressing these inquiries because it only focuses on the information itself while neglecting the underlying carrier, electromagnetic (EM) waves, and environmental interactions. To fill up the gap between the theoretical analysis and the practical application for HMIMO systems, we introduce electromagnetic information theory (EIT) in this paper. This paper begins by laying the foundation for HMIMO-oriented EIT, encompassing EM wave equations and communication regions. In the context of HMIMO systems, the resultant physical limitations are presented, involving Chu's limit, Harrington's limit, Hannan's limit, and the evaluation of coupling effects. Field sampling and HMIMO-assisted oversampling are also discussed to guide the optimal HMIMO design within the EIT framework. To comprehensively depict the EM-compliant propagation process, we present the approximate and exact channel modeling approaches in near-/far-field zones. Furthermore, we discuss both traditional Shannon's information theory, employing the probabilistic method, and Kolmogorov information theory, utilizing the functional analysis, for HMIMO-oriented EIT systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.10496v3-abstract-full').style.display = 'none'; document.getElementById('2405.10496v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.08322">arXiv:2405.08322</a> <span> [<a href="https://arxiv.org/pdf/2405.08322">pdf</a>, <a href="https://arxiv.org/format/2405.08322">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> StraightPCF: Straight Point Cloud Filtering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Edirimuni%2C+D+d+S">Dasith de Silva Edirimuni</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xuequan Lu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Gang Li</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lei Wei</a>, <a href="/search/cs?searchtype=author&query=Robles-Kelly%2C+A">Antonio Robles-Kelly</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hongdong Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.08322v1-abstract-short" style="display: inline;"> Point cloud filtering is a fundamental 3D vision task, which aims to remove noise while recovering the underlying clean surfaces. State-of-the-art methods remove noise by moving noisy points along stochastic trajectories to the clean surfaces. These methods often require regularization within the training objective and/or during post-processing, to ensure fidelity. In this paper, we introduce Stra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.08322v1-abstract-full').style.display = 'inline'; document.getElementById('2405.08322v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.08322v1-abstract-full" style="display: none;"> Point cloud filtering is a fundamental 3D vision task, which aims to remove noise while recovering the underlying clean surfaces. State-of-the-art methods remove noise by moving noisy points along stochastic trajectories to the clean surfaces. These methods often require regularization within the training objective and/or during post-processing, to ensure fidelity. In this paper, we introduce StraightPCF, a new deep learning based method for point cloud filtering. It works by moving noisy points along straight paths, thus reducing discretization errors while ensuring faster convergence to the clean surfaces. We model noisy patches as intermediate states between high noise patch variants and their clean counterparts, and design the VelocityModule to infer a constant flow velocity from the former to the latter. This constant flow leads to straight filtering trajectories. In addition, we introduce a DistanceModule that scales the straight trajectory using an estimated distance scalar to attain convergence near the clean surface. Our network is lightweight and only has $\sim530K$ parameters, being 17% of IterativePFN (a most recent point cloud filtering network). Extensive experiments on both synthetic and real-world data show our method achieves state-of-the-art results. Our method also demonstrates nice distributions of filtered points without the need for regularization. The implementation code can be found at: https://github.com/ddsediri/StraightPCF. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.08322v1-abstract-full').style.display = 'none'; document.getElementById('2405.08322v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted to the IEEE/CVF CVPR Conference, 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.07065">arXiv:2405.07065</a> <span> [<a href="https://arxiv.org/pdf/2405.07065">pdf</a>, <a href="https://arxiv.org/format/2405.07065">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> LogoMotion: Visually Grounded Code Generation for Content-Aware Animation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+V">Vivian Liu</a>, <a href="/search/cs?searchtype=author&query=Kazi%2C+R+H">Rubaiat Habib Kazi</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Li-Yi Wei</a>, <a href="/search/cs?searchtype=author&query=Fisher%2C+M">Matthew Fisher</a>, <a href="/search/cs?searchtype=author&query=Langlois%2C+T">Timothy Langlois</a>, <a href="/search/cs?searchtype=author&query=Walker%2C+S">Seth Walker</a>, <a href="/search/cs?searchtype=author&query=Chilton%2C+L">Lydia Chilton</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.07065v1-abstract-short" style="display: inline;"> Animated logos are a compelling and ubiquitous way individuals and brands represent themselves online. Manually authoring these logos can require significant artistic skill and effort. To help novice designers animate logos, design tools currently offer templates and animation presets. However, these solutions can be limited in their expressive range. Large language models have the potential to he… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.07065v1-abstract-full').style.display = 'inline'; document.getElementById('2405.07065v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.07065v1-abstract-full" style="display: none;"> Animated logos are a compelling and ubiquitous way individuals and brands represent themselves online. Manually authoring these logos can require significant artistic skill and effort. To help novice designers animate logos, design tools currently offer templates and animation presets. However, these solutions can be limited in their expressive range. Large language models have the potential to help novice designers create animated logos by generating animation code that is tailored to their content. In this paper, we introduce LogoMotion, an LLM-based system that takes in a layered document and generates animated logos through visually-grounded program synthesis. We introduce techniques to create an HTML representation of a canvas, identify primary and secondary elements, synthesize animation code, and visually debug animation errors. When compared with an industry standard tool, we find that LogoMotion produces animations that are more content-aware and are on par in terms of quality. We conclude with a discussion of the implications of LLM-generated animation for motion design. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.07065v1-abstract-full').style.display = 'none'; document.getElementById('2405.07065v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.19750">arXiv:2404.19750</a> <span> [<a href="https://arxiv.org/pdf/2404.19750">pdf</a>, <a href="https://arxiv.org/format/2404.19750">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Joint Communication and Computation Design for Distributed RISs Assisted Probabilistic Semantic Communication in IIoT </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhouxiang Zhao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhaohui Yang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chongwen Huang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Li Wei</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Q">Qianqian Yang</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+C">Caijun Zhong</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+W">Wei Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhaoyang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.19750v1-abstract-short" style="display: inline;"> In this paper, the problem of spectral-efficient communication and computation resource allocation for distributed reconfigurable intelligent surfaces (RISs) assisted probabilistic semantic communication (PSC) in industrial Internet-of-Things (IIoT) is investigated. In the considered model, multiple RISs are deployed to serve multiple users, while PSC adopts compute-then-transmit protocol to reduc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.19750v1-abstract-full').style.display = 'inline'; document.getElementById('2404.19750v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.19750v1-abstract-full" style="display: none;"> In this paper, the problem of spectral-efficient communication and computation resource allocation for distributed reconfigurable intelligent surfaces (RISs) assisted probabilistic semantic communication (PSC) in industrial Internet-of-Things (IIoT) is investigated. In the considered model, multiple RISs are deployed to serve multiple users, while PSC adopts compute-then-transmit protocol to reduce the transmission data size. To support high-rate transmission, the semantic compression ratio, transmit power allocation, and distributed RISs deployment must be jointly considered. This joint communication and computation problem is formulated as an optimization problem whose goal is to maximize the sum semantic-aware transmission rate of the system under total transmit power, phase shift, RIS-user association, and semantic compression ratio constraints. To solve this problem, a many-to-many matching scheme is proposed to solve the RIS-user association subproblem, the semantic compression ratio subproblem is addressed following greedy policy, while the phase shift of RIS can be optimized using the tensor based beamforming. Numerical results verify the superiority of the proposed algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.19750v1-abstract-full').style.display = 'none'; document.getElementById('2404.19750v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.12509">arXiv:2404.12509</a> <span> [<a href="https://arxiv.org/pdf/2404.12509">pdf</a>, <a href="https://arxiv.org/format/2404.12509">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3680528.3687561">10.1145/3680528.3687561 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Compositional Neural Textures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tu%2C+P">Peihan Tu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Li-Yi Wei</a>, <a href="/search/cs?searchtype=author&query=Zwicker%2C+M">Matthias Zwicker</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.12509v2-abstract-short" style="display: inline;"> Texture plays a vital role in enhancing visual richness in both real photographs and computer-generated imagery. However, the process of editing textures often involves laborious and repetitive manual adjustments of textons, which are the recurring local patterns that characterize textures. This work introduces a fully unsupervised approach for representing textures using a compositional neural mo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.12509v2-abstract-full').style.display = 'inline'; document.getElementById('2404.12509v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.12509v2-abstract-full" style="display: none;"> Texture plays a vital role in enhancing visual richness in both real photographs and computer-generated imagery. However, the process of editing textures often involves laborious and repetitive manual adjustments of textons, which are the recurring local patterns that characterize textures. This work introduces a fully unsupervised approach for representing textures using a compositional neural model that captures individual textons. We represent each texton as a 2D Gaussian function whose spatial support approximates its shape, and an associated feature that encodes its detailed appearance. By modeling a texture as a discrete composition of Gaussian textons, the representation offers both expressiveness and ease of editing. Textures can be edited by modifying the compositional Gaussians within the latent space, and new textures can be efficiently synthesized by feeding the modified Gaussians through a generator network in a feed-forward manner. This approach enables a wide range of applications, including transferring appearance from an image texture to another image, diversifying textures,texture interpolation, revealing/modifying texture variations, edit propagation, texture animation, and direct texton manipulation. The proposed approach contributes to advancing texture analysis, modeling, and editing techniques, and opens up new possibilities for creating visually appealing images with controllable textures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.12509v2-abstract-full').style.display = 'none'; document.getElementById('2404.12509v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://phtu-cs.github.io/cnt-siga24/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.06563">arXiv:2404.06563</a> <span> [<a href="https://arxiv.org/pdf/2404.06563">pdf</a>, <a href="https://arxiv.org/format/2404.06563">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Demonstration of MaskSearch: Efficiently Querying Image Masks for Machine Learning Workflows </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+L+L">Lindsey Linxi Wei</a>, <a href="/search/cs?searchtype=author&query=Yeung%2C+C+Y+E">Chung Yik Edward Yeung</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+H">Hongjian Yu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jingchuan Zhou</a>, <a href="/search/cs?searchtype=author&query=He%2C+D">Dong He</a>, <a href="/search/cs?searchtype=author&query=Balazinska%2C+M">Magdalena Balazinska</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.06563v1-abstract-short" style="display: inline;"> We demonstrate MaskSearch, a system designed to accelerate queries over databases of image masks generated by machine learning models. MaskSearch formalizes and accelerates a new category of queries for retrieving images and their corresponding masks based on mask properties, which support various applications, from identifying spurious correlations learned by models to exploring discrepancies bet… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06563v1-abstract-full').style.display = 'inline'; document.getElementById('2404.06563v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.06563v1-abstract-full" style="display: none;"> We demonstrate MaskSearch, a system designed to accelerate queries over databases of image masks generated by machine learning models. MaskSearch formalizes and accelerates a new category of queries for retrieving images and their corresponding masks based on mask properties, which support various applications, from identifying spurious correlations learned by models to exploring discrepancies between model saliency and human attention. This demonstration makes the following contributions:(1) the introduction of MaskSearch's graphical user interface (GUI), which enables interactive exploration of image databases through mask properties, (2) hands-on opportunities for users to explore MaskSearch's capabilities and constraints within machine learning workflows, and (3) an opportunity for conference attendees to understand how MaskSearch accelerates queries over image masks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06563v1-abstract-full').style.display = 'none'; document.getElementById('2404.06563v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.04810">arXiv:2404.04810</a> <span> [<a href="https://arxiv.org/pdf/2404.04810">pdf</a>, <a href="https://arxiv.org/format/2404.04810">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> AlphaCrystal-II: Distance matrix based crystal structure prediction using deep learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yuqi Song</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+R">Rongzhi Dong</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lai Wei</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qin Li</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Jianjun Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.04810v1-abstract-short" style="display: inline;"> Computational prediction of stable crystal structures has a profound impact on the large-scale discovery of novel functional materials. However, predicting the crystal structure solely from a material's composition or formula is a promising yet challenging task, as traditional ab initio crystal structure prediction (CSP) methods rely on time-consuming global searches and first-principles free ener… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.04810v1-abstract-full').style.display = 'inline'; document.getElementById('2404.04810v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.04810v1-abstract-full" style="display: none;"> Computational prediction of stable crystal structures has a profound impact on the large-scale discovery of novel functional materials. However, predicting the crystal structure solely from a material's composition or formula is a promising yet challenging task, as traditional ab initio crystal structure prediction (CSP) methods rely on time-consuming global searches and first-principles free energy calculations. Inspired by the recent success of deep learning approaches in protein structure prediction, which utilize pairwise amino acid interactions to describe 3D structures, we present AlphaCrystal-II, a novel knowledge-based solution that exploits the abundant inter-atomic interaction patterns found in existing known crystal structures. AlphaCrystal-II predicts the atomic distance matrix of a target crystal material and employs this matrix to reconstruct its 3D crystal structure. By leveraging the wealth of inter-atomic relationships of known crystal structures, our approach demonstrates remarkable effectiveness and reliability in structure prediction through comprehensive experiments. This work highlights the potential of data-driven methods in accelerating the discovery and design of new materials with tailored properties. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.04810v1-abstract-full').style.display = 'none'; document.getElementById('2404.04810v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.19600">arXiv:2403.19600</a> <span> [<a href="https://arxiv.org/pdf/2403.19600">pdf</a>, <a href="https://arxiv.org/format/2403.19600">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Enhance Image Classification via Inter-Class Image Mixup with Diffusion Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhicai Wang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Longhui Wei</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tan Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Heyu Chen</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+Y">Yanbin Hao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiang Wang</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xiangnan He</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Q">Qi Tian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.19600v1-abstract-short" style="display: inline;"> Text-to-image (T2I) generative models have recently emerged as a powerful tool, enabling the creation of photo-realistic images and giving rise to a multitude of applications. However, the effective integration of T2I models into fundamental image classification tasks remains an open question. A prevalent strategy to bolster image classification performance is through augmenting the training set w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.19600v1-abstract-full').style.display = 'inline'; document.getElementById('2403.19600v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.19600v1-abstract-full" style="display: none;"> Text-to-image (T2I) generative models have recently emerged as a powerful tool, enabling the creation of photo-realistic images and giving rise to a multitude of applications. However, the effective integration of T2I models into fundamental image classification tasks remains an open question. A prevalent strategy to bolster image classification performance is through augmenting the training set with synthetic images generated by T2I models. In this study, we scrutinize the shortcomings of both current generative and conventional data augmentation techniques. Our analysis reveals that these methods struggle to produce images that are both faithful (in terms of foreground objects) and diverse (in terms of background contexts) for domain-specific concepts. To tackle this challenge, we introduce an innovative inter-class data augmentation method known as Diff-Mix (https://github.com/Zhicaiwww/Diff-Mix), which enriches the dataset by performing image translations between classes. Our empirical results demonstrate that Diff-Mix achieves a better balance between faithfulness and diversity, leading to a marked improvement in performance across diverse image classification scenarios, including few-shot, conventional, and long-tail classifications for domain-specific datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.19600v1-abstract-full').style.display = 'none'; document.getElementById('2403.19600v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.13244">arXiv:2403.13244</a> <span> [<a href="https://arxiv.org/pdf/2403.13244">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Instruction Multi-Constraint Molecular Generation Using a Teacher-Student Large Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+P">Peng Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jianmin Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chunyan Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zixu Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yiping Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+S">Siqi Sun</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+J">Jianxin Lin</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Leyi Wei</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+X">Xibao Cai</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+H">Houtim Lai</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wei Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Longyue Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuansheng Liu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+X">Xiangxiang Zeng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.13244v4-abstract-short" style="display: inline;"> While various models and computational tools have been proposed for structure and property analysis of molecules, generating molecules that conform to all desired structures and properties remains a challenge. Here, we introduce a multi-constraint molecular generation large language model, TSMMG, which, akin to a student, incorporates knowledge from various small models and tools, namely, the 'tea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.13244v4-abstract-full').style.display = 'inline'; document.getElementById('2403.13244v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.13244v4-abstract-full" style="display: none;"> While various models and computational tools have been proposed for structure and property analysis of molecules, generating molecules that conform to all desired structures and properties remains a challenge. Here, we introduce a multi-constraint molecular generation large language model, TSMMG, which, akin to a student, incorporates knowledge from various small models and tools, namely, the 'teachers'. To train TSMMG, we construct a large set of text-molecule pairs by extracting molecular knowledge from these 'teachers', enabling it to generate novel molecules that conform to the descriptions through various text prompts. We experimentally show that TSMMG remarkably performs in generating molecules meeting complex, natural language-described property requirements across two-, three-, and four-constraint tasks, with an average molecular validity of over 99% and success ratio of 82.58%, 68.03%, and 67.48%, respectively. The model also exhibits adaptability through zero-shot testing, creating molecules that satisfy combinations of properties that have not been encountered. It can comprehend text inputs with various language styles, extending beyond the confines of outlined prompts, as confirmed through empirical validation. Additionally, the knowledge distillation feature of TSMMG contributes to the continuous enhancement of small models, while the innovative approach to dataset construction effectively addresses the issues of data scarcity and quality, which positions TSMMG as a promising tool in the domains of drug discovery and materials science. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.13244v4-abstract-full').style.display = 'none'; document.getElementById('2403.13244v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">37 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.12767">arXiv:2403.12767</a> <span> [<a href="https://arxiv.org/pdf/2403.12767">pdf</a>, <a href="https://arxiv.org/format/2403.12767">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.eswa.2023.122093">10.1016/j.eswa.2023.122093 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Inter- and intra-uncertainty based feature aggregation model for semi-supervised histopathology image segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+Q">Qiangguo Jin</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+H">Hui Cui</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+C">Changming Sun</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yang Song</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+J">Jiangbin Zheng</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+L">Leilei Cao</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Leyi Wei</a>, <a href="/search/cs?searchtype=author&query=Su%2C+R">Ran Su</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.12767v1-abstract-short" style="display: inline;"> Acquiring pixel-level annotations is often limited in applications such as histology studies that require domain expertise. Various semi-supervised learning approaches have been developed to work with limited ground truth annotations, such as the popular teacher-student models. However, hierarchical prediction uncertainty within the student model (intra-uncertainty) and image prediction uncertaint… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.12767v1-abstract-full').style.display = 'inline'; document.getElementById('2403.12767v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.12767v1-abstract-full" style="display: none;"> Acquiring pixel-level annotations is often limited in applications such as histology studies that require domain expertise. Various semi-supervised learning approaches have been developed to work with limited ground truth annotations, such as the popular teacher-student models. However, hierarchical prediction uncertainty within the student model (intra-uncertainty) and image prediction uncertainty (inter-uncertainty) have not been fully utilized by existing methods. To address these issues, we first propose a novel inter- and intra-uncertainty regularization method to measure and constrain both inter- and intra-inconsistencies in the teacher-student architecture. We also propose a new two-stage network with pseudo-mask guided feature aggregation (PG-FANet) as the segmentation model. The two-stage structure complements with the uncertainty regularization strategy to avoid introducing extra modules in solving uncertainties and the aggregation mechanisms enable multi-scale and multi-stage feature integration. Comprehensive experimental results over the MoNuSeg and CRAG datasets show that our PG-FANet outperforms other state-of-the-art methods and our semi-supervised learning framework yields competitive performance with a limited amount of labeled data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.12767v1-abstract-full').style.display = 'none'; document.getElementById('2403.12767v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Expert Systems with Applications, 2024, 238: 122093 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.09411">arXiv:2403.09411</a> <span> [<a href="https://arxiv.org/pdf/2403.09411">pdf</a>, <a href="https://arxiv.org/format/2403.09411">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/MWC.005.2300457">10.1109/MWC.005.2300457 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Near-Field Channel Modeling for Holographic MIMO Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gong%2C+T">Tierui Gong</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Li Wei</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chongwen Huang</a>, <a href="/search/cs?searchtype=author&query=Alexandropoulos%2C+G+C">George C. Alexandropoulos</a>, <a href="/search/cs?searchtype=author&query=Debbah%2C+M">M茅rouane Debbah</a>, <a href="/search/cs?searchtype=author&query=Yuen%2C+C">Chau Yuen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.09411v2-abstract-short" style="display: inline;"> Empowered by the latest progress on innovative metamaterials/metasurfaces and advanced antenna technologies, holographic multiple-input multiple-output (H-MIMO) emerges as a promising technology to fulfill the extreme goals of the sixth-generation (6G) wireless networks. The antenna arrays utilized in H-MIMO comprise massive (possibly to extreme extent) numbers of antenna elements, densely spaced… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.09411v2-abstract-full').style.display = 'inline'; document.getElementById('2403.09411v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.09411v2-abstract-full" style="display: none;"> Empowered by the latest progress on innovative metamaterials/metasurfaces and advanced antenna technologies, holographic multiple-input multiple-output (H-MIMO) emerges as a promising technology to fulfill the extreme goals of the sixth-generation (6G) wireless networks. The antenna arrays utilized in H-MIMO comprise massive (possibly to extreme extent) numbers of antenna elements, densely spaced less than half-a-wavelength and integrated into a compact space, realizing an almost continuous aperture. Thanks to the expected low cost, size, weight, and power consumption, such apertures are expected to be largely fabricated for near-field communications. In addition, the physical features of H-MIMO enable manipulations directly on the electromagnetic (EM) wave domain and spatial multiplexing. To fully leverage this potential, near-field H-MIMO channel modeling, especially from the EM perspective, is of paramount significance. In this article, we overview near-field H-MIMO channel models elaborating on the various modeling categories and respective features, as well as their challenges and evaluation criteria. We also present EM-domain channel models that address the inherit computational and measurement complexities. Finally, the article is concluded with a set of future research directions on the topic. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.09411v2-abstract-full').style.display = 'none'; document.getElementById('2403.09411v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">double column, 9 pages, 3 figures, 2 tables, accepted by IEEE Wireless Communications Magazine</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.09167">arXiv:2403.09167</a> <span> [<a href="https://arxiv.org/pdf/2403.09167">pdf</a>, <a href="https://arxiv.org/format/2403.09167">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Dial-insight: Fine-tuning Large Language Models with High-Quality Domain-Specific Data Preventing Capability Collapse </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jianwei Sun</a>, <a href="/search/cs?searchtype=author&query=Mei%2C+C">Chaoyang Mei</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Linlin Wei</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+K">Kaiyu Zheng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+N">Na Liu</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+M">Ming Cui</a>, <a href="/search/cs?searchtype=author&query=Li%2C+T">Tianyi Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.09167v1-abstract-short" style="display: inline;"> The efficacy of large language models (LLMs) is heavily dependent on the quality of the underlying data, particularly within specialized domains. A common challenge when fine-tuning LLMs for domain-specific applications is the potential degradation of the model's generalization capabilities. To address these issues, we propose a two-stage approach for the construction of production prompts designe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.09167v1-abstract-full').style.display = 'inline'; document.getElementById('2403.09167v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.09167v1-abstract-full" style="display: none;"> The efficacy of large language models (LLMs) is heavily dependent on the quality of the underlying data, particularly within specialized domains. A common challenge when fine-tuning LLMs for domain-specific applications is the potential degradation of the model's generalization capabilities. To address these issues, we propose a two-stage approach for the construction of production prompts designed to yield high-quality data. This method involves the generation of a diverse array of prompts that encompass a broad spectrum of tasks and exhibit a rich variety of expressions. Furthermore, we introduce a cost-effective, multi-dimensional quality assessment framework to ensure the integrity of the generated labeling data. Utilizing a dataset comprised of service provider and customer interactions from the real estate sector, we demonstrate a positive correlation between data quality and model performance. Notably, our findings indicate that the domain-specific proficiency of general LLMs can be enhanced through fine-tuning with data produced via our proposed method, without compromising their overall generalization abilities, even when exclusively domain-specific data is employed for fine-tuning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.09167v1-abstract-full').style.display = 'none'; document.getElementById('2403.09167v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Wei%2C+L&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Wei%2C+L&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+L&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+L&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+L&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+L&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+L&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+L&start=300" class="pagination-link " aria-label="Page 7" aria-current="page">7 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository