CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 4,886 results for author: <span class="mathjax">Liu, J</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Liu%2C+J">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Liu, J"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Liu%2C+J&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Liu, J"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Liu%2C+J&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Liu%2C+J&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+J&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+J&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+J&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+J&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14416">arXiv:2411.14416</a> <span> [<a href="https://arxiv.org/pdf/2411.14416">pdf</a>, <a href="https://arxiv.org/ps/2411.14416">ps</a>, <a href="https://arxiv.org/format/2411.14416">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Complexity">cs.CC</span> </div> </div> <p class="title is-5 mathjax"> QMA vs. QCMA and Pseudorandomness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiahui Liu</a>, <a href="/search/cs?searchtype=author&query=Mutreja%2C+S">Saachi Mutreja</a>, <a href="/search/cs?searchtype=author&query=Yuen%2C+H">Henry Yuen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14416v1-abstract-short" style="display: inline;"> We study a longstanding question of Aaronson and Kuperberg on whether there exists a classical oracle separating $\mathsf{QMA}$ from $\mathsf{QCMA}$. Settling this question in either direction would yield insight into the power of quantum proofs over classical proofs. We show that such an oracle exists if a certain quantum pseudorandomness conjecture holds. Roughly speaking, the conjecture posits… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14416v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14416v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14416v1-abstract-full" style="display: none;"> We study a longstanding question of Aaronson and Kuperberg on whether there exists a classical oracle separating $\mathsf{QMA}$ from $\mathsf{QCMA}$. Settling this question in either direction would yield insight into the power of quantum proofs over classical proofs. We show that such an oracle exists if a certain quantum pseudorandomness conjecture holds. Roughly speaking, the conjecture posits that quantum algorithms cannot, by making few queries, distinguish between the uniform distribution over permutations versus permutations drawn from so-called "dense" distributions. Our result can be viewed as establishing a "win-win" scenario: \emph{either} there is a classical oracle separation of $\QMA$ from $\QCMA$, \emph{or} there is quantum advantage in distinguishing pseudorandom distributions on permutations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14416v1-abstract-full').style.display = 'none'; document.getElementById('2411.14416v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14252">arXiv:2411.14252</a> <span> [<a href="https://arxiv.org/pdf/2411.14252">pdf</a>, <a href="https://arxiv.org/format/2411.14252">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Intent-Aware Dialogue Generation and Multi-Task Contrastive Learning for Multi-Turn Intent Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junhua Liu</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+Y+K">Yong Keat Tan</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+B">Bin Fu</a>, <a href="/search/cs?searchtype=author&query=Lim%2C+K+H">Kwan Hui Lim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14252v1-abstract-short" style="display: inline;"> Generating large-scale, domain-specific, multilingual multi-turn dialogue datasets remains a significant hurdle for training effective Multi-Turn Intent Classification models in chatbot systems. In this paper, we introduce Chain-of-Intent, a novel mechanism that combines Hidden Markov Models with Large Language Models (LLMs) to generate contextually aware, intent-driven conversations through self-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14252v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14252v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14252v1-abstract-full" style="display: none;"> Generating large-scale, domain-specific, multilingual multi-turn dialogue datasets remains a significant hurdle for training effective Multi-Turn Intent Classification models in chatbot systems. In this paper, we introduce Chain-of-Intent, a novel mechanism that combines Hidden Markov Models with Large Language Models (LLMs) to generate contextually aware, intent-driven conversations through self-play. By extracting domain-specific knowledge from e-commerce chat logs, we estimate conversation turns and intent transitions, which guide the generation of coherent dialogues. Leveraging LLMs to enhance emission probabilities, our approach produces natural and contextually consistent questions and answers. We also propose MINT-CL, a framework for multi-turn intent classification using multi-task contrastive learning, improving classification accuracy without the need for extensive annotated data. Evaluations show that our methods outperform baselines in dialogue quality and intent classification accuracy, especially in multilingual settings, while significantly reducing data generation efforts. Furthermore, we release MINT-E, a multilingual, intent-aware multi-turn e-commerce dialogue corpus to support future research in this area. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14252v1-abstract-full').style.display = 'none'; document.getElementById('2411.14252v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14214">arXiv:2411.14214</a> <span> [<a href="https://arxiv.org/pdf/2411.14214">pdf</a>, <a href="https://arxiv.org/format/2411.14214">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> </div> </div> <p class="title is-5 mathjax"> Physics-Informed LLM-Agent for Automated Modulation Design in Power Electronics Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junhua Liu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+F">Fanfan Lin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xinze Li</a>, <a href="/search/cs?searchtype=author&query=Lim%2C+K+H">Kwan Hui Lim</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Shuai Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14214v1-abstract-short" style="display: inline;"> LLM-based autonomous agents have demonstrated outstanding performance in solving complex industrial tasks. However, in the pursuit of carbon neutrality and high-performance renewable energy systems, existing AI-assisted design automation faces significant limitations in explainability, scalability, and usability. To address these challenges, we propose LP-COMDA, an LLM-based, physics-informed auto… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14214v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14214v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14214v1-abstract-full" style="display: none;"> LLM-based autonomous agents have demonstrated outstanding performance in solving complex industrial tasks. However, in the pursuit of carbon neutrality and high-performance renewable energy systems, existing AI-assisted design automation faces significant limitations in explainability, scalability, and usability. To address these challenges, we propose LP-COMDA, an LLM-based, physics-informed autonomous agent that automates the modulation design of power converters in Power Electronics Systems with minimal human supervision. Unlike traditional AI-assisted approaches, LP-COMDA contains an LLM-based planner that gathers and validates design specifications through a user-friendly chat interface. The planner then coordinates with physics-informed design and optimization tools to iteratively generate and refine modulation designs autonomously. Through the chat interface, LP-COMDA provides an explainable design process, presenting explanations and charts. Experiments show that LP-COMDA outperforms all baseline methods, achieving a 63.2% reduction in error compared to the second-best benchmark method in terms of standard mean absolute error. Furthermore, empirical studies with 20 experts conclude that design time with LP-COMDA is over 33 times faster than conventional methods, showing its significant improvement on design efficiency over the current processes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14214v1-abstract-full').style.display = 'none'; document.getElementById('2411.14214v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14179">arXiv:2411.14179</a> <span> [<a href="https://arxiv.org/pdf/2411.14179">pdf</a>, <a href="https://arxiv.org/format/2411.14179">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CompetitorFormer: Competitor Transformer for 3D Instance Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+D">Duanchu Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jing Liu</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+H">Haoran Gong</a>, <a href="/search/cs?searchtype=author&query=Quan%2C+Y">Yinghui Quan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Di Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14179v1-abstract-short" style="display: inline;"> Transformer-based methods have become the dominant approach for 3D instance segmentation. These methods predict instance masks via instance queries, ranking them by classification confidence and IoU scores to select the top prediction as the final outcome. However, it has been observed that the current models employ a fixed and higher number of queries than the instances present within a scene. In… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14179v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14179v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14179v1-abstract-full" style="display: none;"> Transformer-based methods have become the dominant approach for 3D instance segmentation. These methods predict instance masks via instance queries, ranking them by classification confidence and IoU scores to select the top prediction as the final outcome. However, it has been observed that the current models employ a fixed and higher number of queries than the instances present within a scene. In such instances, multiple queries predict the same instance, yet only a single query is ultimately optimized. The close scores of queries in the lower-level decoders make it challenging for the dominant query to distinguish itself rapidly, which ultimately impairs the model's accuracy and convergence efficiency. This phenomenon is referred to as inter-query competition. To address this challenge, we put forth a series of plug-and-play competition-oriented designs, collectively designated as the CompetitorFormer, with the aim of reducing competition and facilitating a dominant query. Experiments showed that integrating our designs with state-of-the-art frameworks consistently resulted in significant performance improvements in 3D instance segmentation across a range of datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14179v1-abstract-full').style.display = 'none'; document.getElementById('2411.14179v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13797">arXiv:2411.13797</a> <span> [<a href="https://arxiv.org/pdf/2411.13797">pdf</a>, <a href="https://arxiv.org/format/2411.13797">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Hugging Rain Man: A Novel Facial Action Units Dataset for Analyzing Atypical Facial Expressions in Children with Autism Spectrum Disorder </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ji%2C+Y">Yanfeng Ji</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shutong Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+R">Ruyi Xu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jingying Chen</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xinzhou Jiang</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+Z">Zhengyu Deng</a>, <a href="/search/cs?searchtype=author&query=Quan%2C+Y">Yuxuan Quan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junpeng Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13797v1-abstract-short" style="display: inline;"> Children with Autism Spectrum Disorder (ASD) often exhibit atypical facial expressions. However, the specific objective facial features that underlie this subjective perception remain unclear. In this paper, we introduce a novel dataset, Hugging Rain Man (HRM), which includes facial action units (AUs) manually annotated by FACS experts for both children with ASD and typical development (TD). The d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13797v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13797v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13797v1-abstract-full" style="display: none;"> Children with Autism Spectrum Disorder (ASD) often exhibit atypical facial expressions. However, the specific objective facial features that underlie this subjective perception remain unclear. In this paper, we introduce a novel dataset, Hugging Rain Man (HRM), which includes facial action units (AUs) manually annotated by FACS experts for both children with ASD and typical development (TD). The dataset comprises a rich collection of posed and spontaneous facial expressions, totaling approximately 130,000 frames, along with 22 AUs, 10 Action Descriptors (ADs), and atypicality ratings. A statistical analysis of static images from the HRM reveals significant differences between the ASD and TD groups across multiple AUs and ADs when displaying the same emotional expressions, confirming that participants with ASD tend to demonstrate more irregular and diverse expression patterns. Subsequently, a temporal regression method was presented to analyze atypicality of dynamic sequences, thereby bridging the gap between subjective perception and objective facial characteristics. Furthermore, baseline results for AU detection are provided for future research reference. This work not only contributes to our understanding of the unique facial expression characteristics associated with ASD but also provides potential tools for ASD early screening. Portions of the dataset, features, and pretrained models are accessible at: \url{https://github.com/Jonas-DL/Hugging-Rain-Man}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13797v1-abstract-full').style.display = 'none'; document.getElementById('2411.13797v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Portions of the dataset, features, and pretrained models are accessible at: https://github.com/Jonas-DL/Hugging-Rain-Man</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13632">arXiv:2411.13632</a> <span> [<a href="https://arxiv.org/pdf/2411.13632">pdf</a>, <a href="https://arxiv.org/format/2411.13632">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ID-Patch: Robust ID Association for Group Photo Personalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yimeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhi%2C+T">Tiancheng Zhi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jing Liu</a>, <a href="/search/cs?searchtype=author&query=Sang%2C+S">Shen Sang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+L">Liming Jiang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Q">Qing Yan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Sijia Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+L">Linjie Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13632v1-abstract-short" style="display: inline;"> The ability to synthesize personalized group photos and specify the positions of each identity offers immense creative potential. While such imagery can be visually appealing, it presents significant challenges for existing technologies. A persistent issue is identity (ID) leakage, where injected facial features interfere with one another, resulting in low face resemblance, incorrect positioning,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13632v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13632v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13632v1-abstract-full" style="display: none;"> The ability to synthesize personalized group photos and specify the positions of each identity offers immense creative potential. While such imagery can be visually appealing, it presents significant challenges for existing technologies. A persistent issue is identity (ID) leakage, where injected facial features interfere with one another, resulting in low face resemblance, incorrect positioning, and visual artifacts. Existing methods suffer from limitations such as the reliance on segmentation models, increased runtime, or a high probability of ID leakage. To address these challenges, we propose ID-Patch, a novel method that provides robust association between identities and 2D positions. Our approach generates an ID patch and ID embeddings from the same facial features: the ID patch is positioned on the conditional image for precise spatial control, while the ID embeddings integrate with text embeddings to ensure high resemblance. Experimental results demonstrate that ID-Patch surpasses baseline methods across metrics, such as face ID resemblance, ID-position association accuracy, and generation efficiency. Project Page is: https://byteaigc.github.io/ID-Patch/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13632v1-abstract-full').style.display = 'none'; document.getElementById('2411.13632v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project Page is: https://byteaigc.github.io/ID-Patch/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12747">arXiv:2411.12747</a> <span> [<a href="https://arxiv.org/pdf/2411.12747">pdf</a>, <a href="https://arxiv.org/format/2411.12747">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Trading and Market Microstructure">q-fin.TR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Survey of Financial AI: Architectures, Advances and Open Challenges </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junhua Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12747v1-abstract-short" style="display: inline;"> Financial AI empowers sophisticated approaches to financial market forecasting, portfolio optimization, and automated trading. This survey provides a systematic analysis of these developments across three primary dimensions: predictive models that capture complex market dynamics, decision-making frameworks that optimize trading and investment strategies, and knowledge augmentation systems that lev… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12747v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12747v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12747v1-abstract-full" style="display: none;"> Financial AI empowers sophisticated approaches to financial market forecasting, portfolio optimization, and automated trading. This survey provides a systematic analysis of these developments across three primary dimensions: predictive models that capture complex market dynamics, decision-making frameworks that optimize trading and investment strategies, and knowledge augmentation systems that leverage unstructured financial information. We examine significant innovations including foundation models for financial time series, graph-based architectures for market relationship modeling, and hierarchical frameworks for portfolio optimization. Analysis reveals crucial trade-offs between model sophistication and practical constraints, particularly in high-frequency trading applications. We identify critical gaps and open challenges between theoretical advances and industrial implementation, outlining open challenges and opportunities for improving both model performance and practical applicability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12747v1-abstract-full').style.display = 'none'; document.getElementById('2411.12747v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Full list of papers and summary slides are available at: https://github.com/junhua/awesome-finance-ai-papers</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12530">arXiv:2411.12530</a> <span> [<a href="https://arxiv.org/pdf/2411.12530">pdf</a>, <a href="https://arxiv.org/format/2411.12530">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Contourlet Refinement Gate Framework for Thermal Spectrum Distribution Regularized Infrared Image Super-Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zou%2C+Y">Yang Zou</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhixin Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhipeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xingyuan Li</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Long Ma</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jinyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Peng Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yanning Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12530v1-abstract-short" style="display: inline;"> Image super-resolution (SR) is a classical yet still active low-level vision problem that aims to reconstruct high-resolution (HR) images from their low-resolution (LR) counterparts, serving as a key technique for image enhancement. Current approaches to address SR tasks, such as transformer-based and diffusion-based methods, are either dedicated to extracting RGB image features or assuming simila… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12530v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12530v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12530v1-abstract-full" style="display: none;"> Image super-resolution (SR) is a classical yet still active low-level vision problem that aims to reconstruct high-resolution (HR) images from their low-resolution (LR) counterparts, serving as a key technique for image enhancement. Current approaches to address SR tasks, such as transformer-based and diffusion-based methods, are either dedicated to extracting RGB image features or assuming similar degradation patterns, neglecting the inherent modal disparities between infrared and visible images. When directly applied to infrared image SR tasks, these methods inevitably distort the infrared spectral distribution, compromising the machine perception in downstream tasks. In this work, we emphasize the infrared spectral distribution fidelity and propose a Contourlet refinement gate framework to restore infrared modal-specific features while preserving spectral distribution fidelity. Our approach captures high-pass subbands from multi-scale and multi-directional infrared spectral decomposition to recover infrared-degraded information through a gate architecture. The proposed Spectral Fidelity Loss regularizes the spectral frequency distribution during reconstruction, which ensures the preservation of both high- and low-frequency components and maintains the fidelity of infrared-specific features. We propose a two-stage prompt-learning optimization to guide the model in learning infrared HR characteristics from LR degradation. Extensive experiments demonstrate that our approach outperforms existing image SR models in both visual and perceptual tasks while notably enhancing machine perception in downstream tasks. Our code is available at https://github.com/hey-it-s-me/CoRPLE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12530v1-abstract-full').style.display = 'none'; document.getElementById('2411.12530v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 figures, 6 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T45 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.4.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12448">arXiv:2411.12448</a> <span> [<a href="https://arxiv.org/pdf/2411.12448">pdf</a>, <a href="https://arxiv.org/format/2411.12448">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Large Language Models for Lossless Image Compression: Next-Pixel Prediction in Language Space is All You Need </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kecheng Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Pingping Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hui Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jie Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yibing Liu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jixin Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shiqi Wang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+H">Hong Yan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Haoliang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12448v1-abstract-short" style="display: inline;"> We have recently witnessed that ``Intelligence" and `` Compression" are the two sides of the same coin, where the language large model (LLM) with unprecedented intelligence is a general-purpose lossless compressor for various data modalities. This attribute particularly appeals to the lossless image compression community, given the increasing need to compress high-resolution images in the current… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12448v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12448v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12448v1-abstract-full" style="display: none;"> We have recently witnessed that ``Intelligence" and `` Compression" are the two sides of the same coin, where the language large model (LLM) with unprecedented intelligence is a general-purpose lossless compressor for various data modalities. This attribute particularly appeals to the lossless image compression community, given the increasing need to compress high-resolution images in the current streaming media era. Consequently, a spontaneous envision emerges: Can the compression performance of the LLM elevate lossless image compression to new heights? However, our findings indicate that the naive application of LLM-based lossless image compressors suffers from a considerable performance gap compared with existing state-of-the-art (SOTA) codecs on common benchmark datasets. In light of this, we are dedicated to fulfilling the unprecedented intelligence (compression) capacity of the LLM for lossless image compression tasks, thereby bridging the gap between theoretical and practical compression performance. Specifically, we propose P$^{2}$-LLM, a next-pixel prediction-based LLM, which integrates various elaborated insights and methodologies, \textit{e.g.,} pixel-level priors, the in-context ability of LLM, and a pixel-level semantic preservation strategy, to enhance the understanding capacity of pixel sequences for better next-pixel predictions. Extensive experiments on benchmark datasets demonstrate that P$^{2}$-LLM can beat SOTA classical and learned codecs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12448v1-abstract-full').style.display = 'none'; document.getElementById('2411.12448v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12329">arXiv:2411.12329</a> <span> [<a href="https://arxiv.org/pdf/2411.12329">pdf</a>, <a href="https://arxiv.org/format/2411.12329">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Attributed Graph Clustering in Collaborative Settings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rui Zhang</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+X">Xiaoyang Hou</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Z">Zhihua Tian</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jian Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Q">Qingbiao Wu</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+K">Kui Ren</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12329v1-abstract-short" style="display: inline;"> Graph clustering is an unsupervised machine learning method that partitions the nodes in a graph into different groups. Despite achieving significant progress in exploiting both attributed and structured data information, graph clustering methods often face practical challenges related to data isolation. Moreover, the absence of collaborative methods for graph clustering limits their effectiveness… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12329v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12329v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12329v1-abstract-full" style="display: none;"> Graph clustering is an unsupervised machine learning method that partitions the nodes in a graph into different groups. Despite achieving significant progress in exploiting both attributed and structured data information, graph clustering methods often face practical challenges related to data isolation. Moreover, the absence of collaborative methods for graph clustering limits their effectiveness. In this paper, we propose a collaborative graph clustering framework for attributed graphs, supporting attributed graph clustering over vertically partitioned data with different participants holding distinct features of the same data. Our method leverages a novel technique that reduces the sample space, improving the efficiency of the attributed graph clustering method. Furthermore, we compare our method to its centralized counterpart under a proximity condition, demonstrating that the successful local results of each participant contribute to the overall success of the collaboration. We fully implement our approach and evaluate its utility and efficiency by conducting experiments on four public datasets. The results demonstrate that our method achieves comparable accuracy levels to centralized attributed graph clustering methods. Our collaborative graph clustering framework provides an efficient and effective solution for graph clustering challenges related to data isolation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12329v1-abstract-full').style.display = 'none'; document.getElementById('2411.12329v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12307">arXiv:2411.12307</a> <span> [<a href="https://arxiv.org/pdf/2411.12307">pdf</a>, <a href="https://arxiv.org/format/2411.12307">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Balancing Accuracy and Efficiency in Multi-Turn Intent Classification for LLM-Powered Dialog Systems in Production </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junhua Liu</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+Y+K">Yong Keat Tan</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+B">Bin Fu</a>, <a href="/search/cs?searchtype=author&query=Lim%2C+K+H">Kwan Hui Lim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12307v1-abstract-short" style="display: inline;"> Accurate multi-turn intent classification is essential for advancing conversational AI systems. However, challenges such as the scarcity of comprehensive datasets and the complexity of contextual dependencies across dialogue turns hinder progress. This paper presents two novel approaches leveraging Large Language Models (LLMs) to enhance scalability and reduce latency in production dialogue system… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12307v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12307v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12307v1-abstract-full" style="display: none;"> Accurate multi-turn intent classification is essential for advancing conversational AI systems. However, challenges such as the scarcity of comprehensive datasets and the complexity of contextual dependencies across dialogue turns hinder progress. This paper presents two novel approaches leveraging Large Language Models (LLMs) to enhance scalability and reduce latency in production dialogue systems. First, we introduce Symbol Tuning, which simplifies intent labels to reduce task complexity and improve performance in multi-turn dialogues. Second, we propose C-LARA (Consistency-aware, Linguistics Adaptive Retrieval Augmentation), a framework that employs LLMs for data augmentation and pseudo-labeling to generate synthetic multi-turn dialogues. These enriched datasets are used to fine-tune a small, efficient model suitable for deployment. Experiments conducted on multilingual dialogue datasets demonstrate significant improvements in classification accuracy and resource efficiency. Our methods enhance multi-turn intent classification accuracy by 5.09%, reduce annotation costs by 40%, and enable scalable deployment in low-resource multilingual industrial systems, highlighting their practicality and impact. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12307v1-abstract-full').style.display = 'none'; document.getElementById('2411.12307v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11929">arXiv:2411.11929</a> <span> [<a href="https://arxiv.org/pdf/2411.11929">pdf</a>, <a href="https://arxiv.org/format/2411.11929">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> ChatHTTPFuzz: Large Language Model-Assisted IoT HTTP Fuzzing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhe Yang</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+H">Hao Peng</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yanling Jiang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xingwei Li</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Haohua Du</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuhai Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jianwei Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11929v1-abstract-short" style="display: inline;"> Internet of Things (IoT) devices offer convenience through web interfaces, web VPNs, and other web-based services, all relying on the HTTP protocol. However, these externally exposed HTTP services resent significant security risks. Although fuzzing has shown some effectiveness in identifying vulnerabilities in IoT HTTP services, most state-of-the-art tools still rely on random mutation trategies,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11929v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11929v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11929v1-abstract-full" style="display: none;"> Internet of Things (IoT) devices offer convenience through web interfaces, web VPNs, and other web-based services, all relying on the HTTP protocol. However, these externally exposed HTTP services resent significant security risks. Although fuzzing has shown some effectiveness in identifying vulnerabilities in IoT HTTP services, most state-of-the-art tools still rely on random mutation trategies, leading to difficulties in accurately understanding the HTTP protocol's structure and generating many invalid test cases. Furthermore, These fuzzers rely on a limited set of initial seeds for testing. While this approach initiates testing, the limited number and diversity of seeds hinder comprehensive coverage of complex scenarios in IoT HTTP services. In this paper, we investigate and find that large language models (LLMs) excel in parsing HTTP protocol data and analyzing code logic. Based on these findings, we propose a novel LLM-guided IoT HTTP fuzzing method, ChatHTTPFuzz, which automatically parses protocol fields and analyzes service code logic to generate protocol-compliant test cases. Specifically, we use LLMs to label fields in HTTP protocol data, creating seed templates. Second, The LLM analyzes service code to guide the generation of additional packets aligned with the code logic, enriching the seed templates and their field values. Finally, we design an enhanced Thompson sampling algorithm based on the exploration balance factor and mutation potential factor to schedule seed templates. We evaluate ChatHTTPFuzz on 14 different real-world IoT devices. It finds more vulnerabilities than SNIPUZZ, BOOFUZZ, and MUTINY. ChatHTTPFuzz has discovered 103 vulnerabilities, of which 68 are unique, and 23 have been assigned CVEs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11929v1-abstract-full').style.display = 'none'; document.getElementById('2411.11929v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11530">arXiv:2411.11530</a> <span> [<a href="https://arxiv.org/pdf/2411.11530">pdf</a>, <a href="https://arxiv.org/format/2411.11530">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> SeqProFT: Applying LoRA Finetuning for Sequence-only Protein Property Predictions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shuo Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J+K">Jian K. Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11530v1-abstract-short" style="display: inline;"> Protein language models (PLMs) are capable of learning the relationships between protein sequences and functions by treating amino acid sequences as textual data in a self-supervised manner. However, fine-tuning these models typically demands substantial computational resources and time, with results that may not always be optimized for specific tasks. To overcome these challenges, this study empl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11530v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11530v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11530v1-abstract-full" style="display: none;"> Protein language models (PLMs) are capable of learning the relationships between protein sequences and functions by treating amino acid sequences as textual data in a self-supervised manner. However, fine-tuning these models typically demands substantial computational resources and time, with results that may not always be optimized for specific tasks. To overcome these challenges, this study employs the LoRA method to perform end-to-end fine-tuning of the ESM-2 model specifically for protein property prediction tasks, utilizing only sequence information. Additionally, a multi-head attention mechanism is integrated into the downstream network to combine sequence features with contact map information, thereby enhancing the model's comprehension of protein sequences. Experimental results of extensive classification and regression tasks demonstrate that the fine-tuned model achieves strong performance and faster convergence across multiple regression and classification tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11530v1-abstract-full').style.display = 'none'; document.getElementById('2411.11530v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11478">arXiv:2411.11478</a> <span> [<a href="https://arxiv.org/pdf/2411.11478">pdf</a>, <a href="https://arxiv.org/format/2411.11478">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> SoK: On the Role and Future of AIGC Watermarking in the Era of Gen-AI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ren%2C+K">Kui Ren</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Ziqi Yang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+L">Li Lu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jian Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yiming Li</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+J">Jie Wan</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xiaodi Zhao</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+X">Xianheng Feng</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+S">Shuo Shao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11478v2-abstract-short" style="display: inline;"> The rapid advancement of AI technology, particularly in generating AI-generated content (AIGC), has transformed numerous fields, e.g., art video generation, but also brings new risks, including the misuse of AI for misinformation and intellectual property theft. To address these concerns, AIGC watermarks offer an effective solution to mitigate malicious activities. However, existing watermarking s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11478v2-abstract-full').style.display = 'inline'; document.getElementById('2411.11478v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11478v2-abstract-full" style="display: none;"> The rapid advancement of AI technology, particularly in generating AI-generated content (AIGC), has transformed numerous fields, e.g., art video generation, but also brings new risks, including the misuse of AI for misinformation and intellectual property theft. To address these concerns, AIGC watermarks offer an effective solution to mitigate malicious activities. However, existing watermarking surveys focus more on traditional watermarks, overlooking AIGC-specific challenges. In this work, we propose a systematic investigation into AIGC watermarking and provide the first formal definition of AIGC watermarking. Different from previous surveys, we provide a taxonomy based on the core properties of the watermark which are summarized through comprehensive literature from various AIGC modalities. Derived from the properties, we discuss the functionality and security threats of AIGC watermarking. In the end, we thoroughly investigate the AIGC governance of different countries and practitioners. We believe this taxonomy better aligns with the practical demands for watermarking in the era of GenAI, thus providing a clearer summary of existing work and uncovering potential future research directions for the community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11478v2-abstract-full').style.display = 'none'; document.getElementById('2411.11478v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11361">arXiv:2411.11361</a> <span> [<a href="https://arxiv.org/pdf/2411.11361">pdf</a>, <a href="https://arxiv.org/format/2411.11361">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Scalable Autoregressive Monocular Depth Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jinhong Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jian Liu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+D">Dongqi Tang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Weiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wentong Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+D">Danny Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J+i">J intai Chen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jian Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11361v1-abstract-short" style="display: inline;"> This paper proposes a new autoregressive model as an effective and scalable monocular depth estimator. Our idea is simple: We tackle the monocular depth estimation (MDE) task with an autoregressive prediction paradigm, based on two core designs. First, our depth autoregressive model (DAR) treats the depth map of different resolutions as a set of tokens, and conducts the low-to-high resolution auto… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11361v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11361v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11361v1-abstract-full" style="display: none;"> This paper proposes a new autoregressive model as an effective and scalable monocular depth estimator. Our idea is simple: We tackle the monocular depth estimation (MDE) task with an autoregressive prediction paradigm, based on two core designs. First, our depth autoregressive model (DAR) treats the depth map of different resolutions as a set of tokens, and conducts the low-to-high resolution autoregressive objective with a patch-wise casual mask. Second, our DAR recursively discretizes the entire depth range into more compact intervals, and attains the coarse-to-fine granularity autoregressive objective in an ordinal-regression manner. By coupling these two autoregressive objectives, our DAR establishes new state-of-the-art (SOTA) on KITTI and NYU Depth v2 by clear margins. Further, our scalable approach allows us to scale the model up to 2.0B and achieve the best RMSE of 1.799 on the KITTI dataset (5% improvement) compared to 1.896 by the current SOTA (Depth Anything). DAR further showcases zero-shot generalization ability on unseen datasets. These results suggest that DAR yields superior performance with an autoregressive prediction paradigm, providing a promising approach to equip modern autoregressive large models (e.g., GPT-4o) with depth estimation capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11361v1-abstract-full').style.display = 'none'; document.getElementById('2411.11361v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10937">arXiv:2411.10937</a> <span> [<a href="https://arxiv.org/pdf/2411.10937">pdf</a>, <a href="https://arxiv.org/format/2411.10937">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Memory-Augmented Multimodal LLMs for Surgical VQA via Self-Contained Inquiry </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hou%2C+W">Wenjun Hou</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yi Cheng</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+K">Kaishuai Xu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yan Hu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenjie Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10937v1-abstract-short" style="display: inline;"> Comprehensively understanding surgical scenes in Surgical Visual Question Answering (Surgical VQA) requires reasoning over multiple objects. Previous approaches address this task using cross-modal fusion strategies to enhance reasoning ability. However, these methods often struggle with limited scene understanding and question comprehension, and some rely on external resources (e.g., pre-extracted… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10937v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10937v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10937v1-abstract-full" style="display: none;"> Comprehensively understanding surgical scenes in Surgical Visual Question Answering (Surgical VQA) requires reasoning over multiple objects. Previous approaches address this task using cross-modal fusion strategies to enhance reasoning ability. However, these methods often struggle with limited scene understanding and question comprehension, and some rely on external resources (e.g., pre-extracted object features), which can introduce errors and generalize poorly across diverse surgical environments. To address these challenges, we propose SCAN, a simple yet effective memory-augmented framework that leverages Multimodal LLMs to improve surgical context comprehension via Self-Contained Inquiry. SCAN operates autonomously, generating two types of memory for context augmentation: Direct Memory (DM), which provides multiple candidates (or hints) to the final answer, and Indirect Memory (IM), which consists of self-contained question-hint pairs to capture broader scene context. DM directly assists in answering the question, while IM enhances understanding of the surgical scene beyond the immediate query. Reasoning over these object-aware memories enables the model to accurately interpret images and respond to questions. Extensive experiments on three publicly available Surgical VQA datasets demonstrate that SCAN achieves state-of-the-art performance, offering improved accuracy and robustness across various surgical scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10937v1-abstract-full').style.display = 'none'; document.getElementById('2411.10937v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10911">arXiv:2411.10911</a> <span> [<a href="https://arxiv.org/pdf/2411.10911">pdf</a>, <a href="https://arxiv.org/format/2411.10911">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> </div> </div> <p class="title is-5 mathjax"> Constructing accurate machine-learned potentials and performing highly efficient atomistic simulations to predict structural and thermal properties </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junlan Liu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+Q">Qian Yin</a>, <a href="/search/cs?searchtype=author&query=He%2C+M">Mengshu He</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jun Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10911v1-abstract-short" style="display: inline;"> The $\text{Cu}_7\text{P}\text{S}_6$ compound has garnered significant attention due to its potential in thermoelectric applications. In this study, we introduce a neuroevolution potential (NEP), trained on a dataset generated from ab initio molecular dynamics (AIMD) simulations, using the moment tensor potential (MTP) as a reference. The low root mean square errors (RMSEs) for total energy and ato… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10911v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10911v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10911v1-abstract-full" style="display: none;"> The $\text{Cu}_7\text{P}\text{S}_6$ compound has garnered significant attention due to its potential in thermoelectric applications. In this study, we introduce a neuroevolution potential (NEP), trained on a dataset generated from ab initio molecular dynamics (AIMD) simulations, using the moment tensor potential (MTP) as a reference. The low root mean square errors (RMSEs) for total energy and atomic forces demonstrate the high accuracy and transferability of both the MTP and NEP. We further calculate the phonon density of states (DOS) and radial distribution function (RDF) using both machine learning potentials, comparing the results to density functional theory (DFT) calculations. While the MTP potential offers slightly higher accuracy, the NEP achieves a remarkable 41-fold increase in computational speed. These findings provide detailed microscopic insights into the dynamics and rapid Cu-ion diffusion, paving the way for future studies on Cu-based solid electrolytes and their applications in energy devices. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10911v1-abstract-full').style.display = 'none'; document.getElementById('2411.10911v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10697">arXiv:2411.10697</a> <span> [<a href="https://arxiv.org/pdf/2411.10697">pdf</a>, <a href="https://arxiv.org/format/2411.10697">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Language Model Evolutionary Algorithms for Recommender Systems: Benchmarks and Algorithm Comparisons </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiao Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Z">Zhu Sun</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+S">Shanshan Feng</a>, <a href="/search/cs?searchtype=author&query=Ong%2C+Y">Yew-Soon Ong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10697v1-abstract-short" style="display: inline;"> In the evolutionary computing community, the remarkable language-handling capabilities and reasoning power of large language models (LLMs) have significantly enhanced the functionality of evolutionary algorithms (EAs), enabling them to tackle optimization problems involving structured language or program code. Although this field is still in its early stages, its impressive potential has led to th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10697v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10697v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10697v1-abstract-full" style="display: none;"> In the evolutionary computing community, the remarkable language-handling capabilities and reasoning power of large language models (LLMs) have significantly enhanced the functionality of evolutionary algorithms (EAs), enabling them to tackle optimization problems involving structured language or program code. Although this field is still in its early stages, its impressive potential has led to the development of various LLM-based EAs. To effectively evaluate the performance and practical applicability of these LLM-based EAs, benchmarks with real-world relevance are essential. In this paper, we focus on LLM-based recommender systems (RSs) and introduce a benchmark problem set, named RSBench, specifically designed to assess the performance of LLM-based EAs in recommendation prompt optimization. RSBench emphasizes session-based recommendations, aiming to discover a set of Pareto optimal prompts that guide the recommendation process, providing accurate, diverse, and fair recommendations. We develop three LLM-based EAs based on established EA frameworks and experimentally evaluate their performance using RSBench. Our study offers valuable insights into the application of EAs in LLM-based RSs. Additionally, we explore key components that may influence the overall performance of the RS, providing meaningful guidance for future research on the development of LLM-based EAs in RSs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10697v1-abstract-full').style.display = 'none'; document.getElementById('2411.10697v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10510">arXiv:2411.10510</a> <span> [<a href="https://arxiv.org/pdf/2411.10510">pdf</a>, <a href="https://arxiv.org/format/2411.10510">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SmoothCache: A Universal Inference Acceleration Technique for Diffusion Transformers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Joseph Liu</a>, <a href="/search/cs?searchtype=author&query=Geddes%2C+J">Joshua Geddes</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Ziyu Guo</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+H">Haomiao Jiang</a>, <a href="/search/cs?searchtype=author&query=Nandwana%2C+M+K">Mahesh Kumar Nandwana</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10510v1-abstract-short" style="display: inline;"> Diffusion Transformers (DiT) have emerged as powerful generative models for various tasks, including image, video, and speech synthesis. However, their inference process remains computationally expensive due to the repeated evaluation of resource-intensive attention and feed-forward modules. To address this, we introduce SmoothCache, a model-agnostic inference acceleration technique for DiT archit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10510v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10510v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10510v1-abstract-full" style="display: none;"> Diffusion Transformers (DiT) have emerged as powerful generative models for various tasks, including image, video, and speech synthesis. However, their inference process remains computationally expensive due to the repeated evaluation of resource-intensive attention and feed-forward modules. To address this, we introduce SmoothCache, a model-agnostic inference acceleration technique for DiT architectures. SmoothCache leverages the observed high similarity between layer outputs across adjacent diffusion timesteps. By analyzing layer-wise representation errors from a small calibration set, SmoothCache adaptively caches and reuses key features during inference. Our experiments demonstrate that SmoothCache achieves 8% to 71% speed up while maintaining or even improving generation quality across diverse modalities. We showcase its effectiveness on DiT-XL for image generation, Open-Sora for text-to-video, and Stable Audio Open for text-to-audio, highlighting its potential to enable real-time applications and broaden the accessibility of powerful DiT models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10510v1-abstract-full').style.display = 'none'; document.getElementById('2411.10510v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Code can be found at https://github.com/Roblox/SmoothCache</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09823">arXiv:2411.09823</a> <span> [<a href="https://arxiv.org/pdf/2411.09823">pdf</a>, <a href="https://arxiv.org/format/2411.09823">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Architect: Generating Vivid and Interactive 3D Scenes with Hierarchical 2D Inpainting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yian Wang</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+X">Xiaowen Qiu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiageng Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhehuan Chen</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+J">Jiting Cai</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yufei Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tsun-Hsuan Wang</a>, <a href="/search/cs?searchtype=author&query=Xian%2C+Z">Zhou Xian</a>, <a href="/search/cs?searchtype=author&query=Gan%2C+C">Chuang Gan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09823v1-abstract-short" style="display: inline;"> Creating large-scale interactive 3D environments is essential for the development of Robotics and Embodied AI research. Current methods, including manual design, procedural generation, diffusion-based scene generation, and large language model (LLM) guided scene design, are hindered by limitations such as excessive human effort, reliance on predefined rules or training datasets, and limited 3D spa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09823v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09823v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09823v1-abstract-full" style="display: none;"> Creating large-scale interactive 3D environments is essential for the development of Robotics and Embodied AI research. Current methods, including manual design, procedural generation, diffusion-based scene generation, and large language model (LLM) guided scene design, are hindered by limitations such as excessive human effort, reliance on predefined rules or training datasets, and limited 3D spatial reasoning ability. Since pre-trained 2D image generative models better capture scene and object configuration than LLMs, we address these challenges by introducing Architect, a generative framework that creates complex and realistic 3D embodied environments leveraging diffusion-based 2D image inpainting. In detail, we utilize foundation visual perception models to obtain each generated object from the image and leverage pre-trained depth estimation models to lift the generated 2D image to 3D space. Our pipeline is further extended to a hierarchical and iterative inpainting process to continuously generate placement of large furniture and small objects to enrich the scene. This iterative structure brings the flexibility for our method to generate or refine scenes from various starting points, such as text, floor plans, or pre-arranged environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09823v1-abstract-full').style.display = 'none'; document.getElementById('2411.09823v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09712">arXiv:2411.09712</a> <span> [<a href="https://arxiv.org/pdf/2411.09712">pdf</a>, <a href="https://arxiv.org/format/2411.09712">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Space-Air-Ground Integrated MEC-Assisted Industrial Cyber-Physical Systems: An Online Decentralized Optimization Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=He%2C+L">Long He</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+G">Geng Sun</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Z">Zemin Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiangchuan Liu</a>, <a href="/search/cs?searchtype=author&query=Leung%2C+V+C+M">Victor C. M. Leung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09712v1-abstract-short" style="display: inline;"> Cloud computing and edge/fog computing are playing a pivotal role in driving the transformation of industrial cyber-physical systems (ICPS) towards greater intelligence and automation by providing high-quality computation offloading services to Internet of Things devices (IoTDs). Recently, space-air-ground integrated multi-access edge computing (SAGIMEC) is emerging as a promising architecture com… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09712v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09712v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09712v1-abstract-full" style="display: none;"> Cloud computing and edge/fog computing are playing a pivotal role in driving the transformation of industrial cyber-physical systems (ICPS) towards greater intelligence and automation by providing high-quality computation offloading services to Internet of Things devices (IoTDs). Recently, space-air-ground integrated multi-access edge computing (SAGIMEC) is emerging as a promising architecture combining edge computing and cloud computing, which has the potential to be integrated with ICPS to accelerate the realization of the above vision. In this work, we first present an SAGIMEC-assisted ICPS architecture that incorporates edge computing and cloud computing through seamless connectivity supported by satellite networks to achieve determinism in connectivity, networked computing, and intelligent networked control. Then, we formulate a joint satellite selection, computation offloading, communication resource allocation, computation resource allocation, and UAV trajectory control optimization problem (JSC4OP) to maximize the quality of service (QoS) of IoTDs. This problem considers both the dynamics and uncertainties of the system environment, as well as the limited resources and energy of UAVs. Given the complexity of JSC4OP, we propose an online decentralized optimization approach (ODOA) to solve the problem. Specifically, JSC4OP is first transformed into a real-time decision-making optimization problem (RDOP) by leveraging Lyapunov optimization. Then, to solve the RDOP, we introduce an online learning-based latency prediction method to predict the uncertain system environment and a game theoretic decision-making method to make real-time decisions. Finally, theoretical analysis confirms the effectiveness of the ODOA, while the simulation results demonstrate that the proposed ODOA outperforms other alternative approaches in terms of overall system performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09712v1-abstract-full').style.display = 'none'; document.getElementById('2411.09712v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2406.11918</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09585">arXiv:2411.09585</a> <span> [<a href="https://arxiv.org/pdf/2411.09585">pdf</a>, <a href="https://arxiv.org/format/2411.09585">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Backdoor Mitigation by Distance-Driven Detoxification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+S">Shaokui Wei</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiayin Liu</a>, <a href="/search/cs?searchtype=author&query=Zha%2C+H">Hongyuan Zha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09585v1-abstract-short" style="display: inline;"> Backdoor attacks undermine the integrity of machine learning models by allowing attackers to manipulate predictions using poisoned training data. Such attacks lead to targeted misclassification when specific triggers are present, while the model behaves normally under other conditions. This paper considers a post-training backdoor defense task, aiming to detoxify the backdoors in pre-trained model… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09585v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09585v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09585v1-abstract-full" style="display: none;"> Backdoor attacks undermine the integrity of machine learning models by allowing attackers to manipulate predictions using poisoned training data. Such attacks lead to targeted misclassification when specific triggers are present, while the model behaves normally under other conditions. This paper considers a post-training backdoor defense task, aiming to detoxify the backdoors in pre-trained models. We begin by analyzing the underlying issues of vanilla fine-tuning and observe that it is often trapped in regions with low loss for both clean and poisoned samples. Motivated by such observations, we propose Distance-Driven Detoxification (D3), an innovative approach that reformulates backdoor defense as a constrained optimization problem. Specifically, D3 promotes the model's departure from the vicinity of its initial weights, effectively reducing the influence of backdoors. Extensive experiments on state-of-the-art (SOTA) backdoor attacks across various model architectures and datasets demonstrate that D3 not only matches but often surpasses the performance of existing SOTA post-training defense techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09585v1-abstract-full').style.display = 'none'; document.getElementById('2411.09585v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09439">arXiv:2411.09439</a> <span> [<a href="https://arxiv.org/pdf/2411.09439">pdf</a>, <a href="https://arxiv.org/format/2411.09439">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Spider: Any-to-Many Multimodal LLM </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lai%2C+J">Jinxiang Lai</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jie Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jun Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jian Li</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xiaocheng Lu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+S">Song Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09439v1-abstract-short" style="display: inline;"> Multimodal LLMs (MLLMs) have emerged as an extension of Large Language Models (LLMs), enabling the integration of various modalities. However, Any-to-Any MLLMs are limited to generating pairwise modalities 'Text + X' within a single response, such as Text + {Image or Audio or Video}. To address this limitation, we introduce Spider, a novel efficient Any-to-Many Modalities Generation (AMMG) framewo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09439v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09439v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09439v1-abstract-full" style="display: none;"> Multimodal LLMs (MLLMs) have emerged as an extension of Large Language Models (LLMs), enabling the integration of various modalities. However, Any-to-Any MLLMs are limited to generating pairwise modalities 'Text + X' within a single response, such as Text + {Image or Audio or Video}. To address this limitation, we introduce Spider, a novel efficient Any-to-Many Modalities Generation (AMMG) framework, which can generate an arbitrary combination of modalities 'Text + Xs', such as Text + {Image and Audio and Video}. To achieve efficient AMMG, our Spider integrates three core components: a Base Model for basic X-to-X (i.e., Any-to-Any) modality processing, a novel Efficient Decoders-Controller for controlling multimodal Decoders to generate Xs (many-modal) contents, and an Any-to-Many Instruction Template designed for producing Xs signal prompts. To train Spider, we constructed a novel Text-formatted Many-Modal (TMM) dataset, which facilitates the learning of the X-to-Xs (i.e., Any-to-Many) capability necessary for AMMG. Ultimately, the well-trained Spider generates a pseudo X-to-Xs dataset, the first-ever X-to-Xs many-modal dataset, enhancing the potential for AMMG task in future research. Overall, this work not only pushes the boundary of multimodal interaction but also provides rich data support for advancing the field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09439v1-abstract-full').style.display = 'none'; document.getElementById('2411.09439v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09422">arXiv:2411.09422</a> <span> [<a href="https://arxiv.org/pdf/2411.09422">pdf</a>, <a href="https://arxiv.org/format/2411.09422">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> OpenLS-DGF: An Adaptive Open-Source Dataset Generation Framework for Machine Learning Tasks in Logic Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ni%2C+L">Liwei Ni</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Miao Liu</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+X">Xingyu Meng</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+X">Xiaoze Lin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junfeng Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+G">Guojie Luo</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+Z">Zhufei Chu</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+W">Weikang Qian</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaoyan Yang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+B">Biwei Xie</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xingquan Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Huawei Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09422v2-abstract-short" style="display: inline;"> This paper introduces OpenLS-DGF, an adaptive logic synthesis dataset generation framework, to enhance machine learning~(ML) applications within the logic synthesis process. Previous dataset generation flows were tailored for specific tasks or lacked integrated machine learning capabilities. While OpenLS-DGF supports various machine learning tasks by encapsulating the three fundamental steps of lo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09422v2-abstract-full').style.display = 'inline'; document.getElementById('2411.09422v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09422v2-abstract-full" style="display: none;"> This paper introduces OpenLS-DGF, an adaptive logic synthesis dataset generation framework, to enhance machine learning~(ML) applications within the logic synthesis process. Previous dataset generation flows were tailored for specific tasks or lacked integrated machine learning capabilities. While OpenLS-DGF supports various machine learning tasks by encapsulating the three fundamental steps of logic synthesis: Boolean representation, logic optimization, and technology mapping. It preserves the original information in both Verilog and machine-learning-friendly GraphML formats. The verilog files offer semi-customizable capabilities, enabling researchers to insert additional steps and incrementally refine the generated dataset. Furthermore, OpenLS-DGF includes an adaptive circuit engine that facilitates the final dataset management and downstream tasks. The generated OpenLS-D-v1 dataset comprises 46 combinational designs from established benchmarks, totaling over 966,000 Boolean circuits. OpenLS-D-v1 supports integrating new data features, making it more versatile for new challenges. This paper demonstrates the versatility of OpenLS-D-v1 through four distinct downstream tasks: circuit classification, circuit ranking, quality of results (QoR) prediction, and probability prediction. Each task is chosen to represent essential steps of logic synthesis, and the experimental results show the generated dataset from OpenLS-DGF achieves prominent diversity and applicability. The source code and datasets are available at https://github.com/Logic-Factory/ACE/blob/master/OpenLS-DGF/readme.md. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09422v2-abstract-full').style.display = 'none'; document.getElementById('2411.09422v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09308">arXiv:2411.09308</a> <span> [<a href="https://arxiv.org/pdf/2411.09308">pdf</a>, <a href="https://arxiv.org/format/2411.09308">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DT-JRD: Deep Transformer based Just Recognizable Difference Prediction Model for Video Coding for Machines </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junqi Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yun Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaoqi Wang</a>, <a href="/search/cs?searchtype=author&query=Long%2C+X">Xu Long</a>, <a href="/search/cs?searchtype=author&query=Kwong%2C+S">Sam Kwong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09308v1-abstract-short" style="display: inline;"> Just Recognizable Difference (JRD) represents the minimum visual difference that is detectable by machine vision, which can be exploited to promote machine vision oriented visual signal processing. In this paper, we propose a Deep Transformer based JRD (DT-JRD) prediction model for Video Coding for Machines (VCM), where the accurately predicted JRD can be used reduce the coding bit rate while main… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09308v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09308v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09308v1-abstract-full" style="display: none;"> Just Recognizable Difference (JRD) represents the minimum visual difference that is detectable by machine vision, which can be exploited to promote machine vision oriented visual signal processing. In this paper, we propose a Deep Transformer based JRD (DT-JRD) prediction model for Video Coding for Machines (VCM), where the accurately predicted JRD can be used reduce the coding bit rate while maintaining the accuracy of machine tasks. Firstly, we model the JRD prediction as a multi-class classification and propose a DT-JRD prediction model that integrates an improved embedding, a content and distortion feature extraction, a multi-class classification and a novel learning strategy. Secondly, inspired by the perception property that machine vision exhibits a similar response to distortions near JRD, we propose an asymptotic JRD loss by using Gaussian Distribution-based Soft Labels (GDSL), which significantly extends the number of training labels and relaxes classification boundaries. Finally, we propose a DT-JRD based VCM to reduce the coding bits while maintaining the accuracy of object detection. Extensive experimental results demonstrate that the mean absolute error of the predicted JRD by the DT-JRD is 5.574, outperforming the state-of-the-art JRD prediction model by 13.1%. Coding experiments shows that comparing with the VVC, the DT-JRD based VCM achieves an average of 29.58% bit rate reduction while maintaining the object detection accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09308v1-abstract-full').style.display = 'none'; document.getElementById('2411.09308v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to IEEE Transactions on Multimedia</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09289">arXiv:2411.09289</a> <span> [<a href="https://arxiv.org/pdf/2411.09289">pdf</a>, <a href="https://arxiv.org/format/2411.09289">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> StreamAdapter: Efficient Test Time Adaptation from Contextual Streams </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Muhtar%2C+D">Dilxat Muhtar</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yelong Shen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yaming Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaodong Liu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yadong Lu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jianfeng Liu</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+Y">Yuefeng Zhan</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Hao Sun</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+W">Weiwei Deng</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+F">Feng Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xueliang Zhang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jianfeng Gao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Weizhu Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qi Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09289v1-abstract-short" style="display: inline;"> In-context learning (ICL) allows large language models (LLMs) to adapt to new tasks directly from the given demonstrations without requiring gradient updates. While recent advances have expanded context windows to accommodate more demonstrations, this approach increases inference costs without necessarily improving performance. To mitigate these issues, We propose StreamAdapter, a novel approach t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09289v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09289v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09289v1-abstract-full" style="display: none;"> In-context learning (ICL) allows large language models (LLMs) to adapt to new tasks directly from the given demonstrations without requiring gradient updates. While recent advances have expanded context windows to accommodate more demonstrations, this approach increases inference costs without necessarily improving performance. To mitigate these issues, We propose StreamAdapter, a novel approach that directly updates model parameters from context at test time, eliminating the need for explicit in-context demonstrations. StreamAdapter employs context mapping and weight absorption mechanisms to dynamically transform ICL demonstrations into parameter updates with minimal additional parameters. By reducing reliance on numerous in-context examples, StreamAdapter significantly reduce inference costs and allows for efficient inference with constant time complexity, regardless of demonstration count. Extensive experiments across diverse tasks and model architectures demonstrate that StreamAdapter achieves comparable or superior adaptation capability to ICL while requiring significantly fewer demonstrations. The superior task adaptation and context encoding capabilities of StreamAdapter on both language understanding and generation tasks provides a new perspective for adapting LLMs at test time using context, allowing for more efficient adaptation across scenarios and more cost-effective inference <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09289v1-abstract-full').style.display = 'none'; document.getElementById('2411.09289v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 Pages, 9 Figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09242">arXiv:2411.09242</a> <span> [<a href="https://arxiv.org/pdf/2411.09242">pdf</a>, <a href="https://arxiv.org/format/2411.09242">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FluidML: Fast and Memory Efficient Inference Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jinjie Liu</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+H">Hang Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09242v1-abstract-short" style="display: inline;"> Machine learning models deployed on edge devices have enabled numerous exciting new applications, such as humanoid robots, AR glasses, and autonomous vehicles. However, the computing resources available on these edge devices are not catching up with the ever-growing number of parameters in these models. As the models become bigger and more complicated, the novel yet sophisticated structure challen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09242v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09242v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09242v1-abstract-full" style="display: none;"> Machine learning models deployed on edge devices have enabled numerous exciting new applications, such as humanoid robots, AR glasses, and autonomous vehicles. However, the computing resources available on these edge devices are not catching up with the ever-growing number of parameters in these models. As the models become bigger and more complicated, the novel yet sophisticated structure challenges the inference runtime optimization. We present FluidML, a generic runtime memory management and optimization framework that can flexibly transform the model execution blueprint to achieve faster and more memory-efficient inference. Evaluations across different platforms show that FluidML can consistently reduce the end-to-end inference latency by up to 25.38% for popular language models and reduce peak memory usage by up to 41.47%, compared to state-of-the-art approaches. FluidML is of ~30K line of codes, built for general-purpose usage, and will be released as an open-source inference runtime optimization framework to the community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09242v1-abstract-full').style.display = 'none'; document.getElementById('2411.09242v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09231">arXiv:2411.09231</a> <span> [<a href="https://arxiv.org/pdf/2411.09231">pdf</a>, <a href="https://arxiv.org/format/2411.09231">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> AEAKA: An Adaptive and Efficient Authentication and Key Agreement Scheme for IoT in Cloud-Edge-Device Collaborative Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kexian Liu</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+J">Jianfeng Guan</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xiaolong Hu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jing Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jianli Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hongke Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09231v1-abstract-short" style="display: inline;"> To meet the diverse needs of users, the rapid advancement of cloud-edge-device collaboration has become a standard practice. However, this complex environment, particularly in untrusted (non-collaborative) scenarios, presents numerous security challenges. Authentication acts as the first line of defense and is fundamental to addressing these issues. Although many authentication and key agreement s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09231v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09231v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09231v1-abstract-full" style="display: none;"> To meet the diverse needs of users, the rapid advancement of cloud-edge-device collaboration has become a standard practice. However, this complex environment, particularly in untrusted (non-collaborative) scenarios, presents numerous security challenges. Authentication acts as the first line of defense and is fundamental to addressing these issues. Although many authentication and key agreement schemes exist, they often face limitations, such as being tailored to overly specific scenarios where devices authenticate solely with either the edge or the cloud, or being unsuitable for resource-constrained devices. To address these challenges, we propose an adaptive and efficient authentication and key agreement scheme (AEAKA) for Cloud-Edge-Device IoT environments. This scheme is highly adaptive and scalable, capable of automatically and dynamically initiating different authentication methods based on device requirements. Additionally, it employs an edge-assisted authentication approach to reduce the load on third-party trust authorities. Furthermore, we introduce a hash-based algorithm for the authentication protocol, ensuring a lightweight method suitable for a wide range of resource-constrained devices while maintaining security. AEAKA ensures that entities use associated authentication credentials, enhancing the privacy of the authentication process. Security proofs and performance analyses demonstrate that AEAKA outperforms other methods in terms of security and authentication efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09231v1-abstract-full').style.display = 'none'; document.getElementById('2411.09231v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages,14 figures,submitted to Transactions on Dependable and Secure Computing in 30-May-2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09229">arXiv:2411.09229</a> <span> [<a href="https://arxiv.org/pdf/2411.09229">pdf</a>, <a href="https://arxiv.org/format/2411.09229">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Efficient and Secure Cross-Domain Data-Sharing for Resource-Constrained Internet of Things </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kexian Liu</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+J">Jianfeng Guan</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xiaolong Hu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jianli Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hongke Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09229v1-abstract-short" style="display: inline;"> The growing complexity of Internet of Things (IoT) environments, particularly in cross-domain data sharing, presents significant security challenges. Existing data-sharing schemes often rely on computationally expensive cryptographic operations and centralized key management, limiting their effectiveness for resource-constrained devices. To address these issues, we propose an efficient, secure blo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09229v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09229v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09229v1-abstract-full" style="display: none;"> The growing complexity of Internet of Things (IoT) environments, particularly in cross-domain data sharing, presents significant security challenges. Existing data-sharing schemes often rely on computationally expensive cryptographic operations and centralized key management, limiting their effectiveness for resource-constrained devices. To address these issues, we propose an efficient, secure blockchain-based data-sharing scheme. First, our scheme adopts a distributed key generation method, which avoids single point of failure. This method also allows independent pseudonym generation and key updates, enhancing authentication flexibility while reducing computational overhead. Additionally, the scheme provides a complete data-sharing process, covering data uploading, storage, and sharing, while ensuring data traceability, integrity, and privacy. Security analysis shows that the proposed scheme is theoretically secure and resistant to various attacks, while performance evaluations demonstrate lower computational and communication overhead compared to existing solutions, making it both secure and efficient for IoT applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09229v1-abstract-full').style.display = 'none'; document.getElementById('2411.09229v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages,10 figures, submitted to Transactions on Information Forensics & Security in 19-Sep-2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08592">arXiv:2411.08592</a> <span> [<a href="https://arxiv.org/pdf/2411.08592">pdf</a>, <a href="https://arxiv.org/format/2411.08592">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Slender Object Scene Segmentation in Remote Sensing Image Based on Learnable Morphological Skeleton with Segment Anything Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+J">Jun Xie</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenxiao Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+F">Faqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Liqiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+Z">Zhengyang Hou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jun Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08592v1-abstract-short" style="display: inline;"> Morphological methods play a crucial role in remote sensing image processing, due to their ability to capture and preserve small structural details. However, most of the existing deep learning models for semantic segmentation are based on the encoder-decoder architecture including U-net and Segment Anything Model (SAM), where the downsampling process tends to discard fine details. In this paper, w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08592v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08592v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08592v1-abstract-full" style="display: none;"> Morphological methods play a crucial role in remote sensing image processing, due to their ability to capture and preserve small structural details. However, most of the existing deep learning models for semantic segmentation are based on the encoder-decoder architecture including U-net and Segment Anything Model (SAM), where the downsampling process tends to discard fine details. In this paper, we propose a new approach that integrates learnable morphological skeleton prior into deep neural networks using the variational method. To address the difficulty in backpropagation in neural networks caused by the non-differentiability presented in classical morphological operations, we provide a smooth representation of the morphological skeleton and design a variational segmentation model integrating morphological skeleton prior by employing operator splitting and dual methods. Then, we integrate this model into the network architecture of SAM, which is achieved by adding a token to mask decoder and modifying the final sigmoid layer, ensuring the final segmentation results preserve the skeleton structure as much as possible. Experimental results on remote sensing datasets, including buildings and roads, demonstrate that our method outperforms the original SAM on slender object segmentation and exhibits better generalization capability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08592v1-abstract-full').style.display = 'none'; document.getElementById('2411.08592v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08545">arXiv:2411.08545</a> <span> [<a href="https://arxiv.org/pdf/2411.08545">pdf</a>, <a href="https://arxiv.org/format/2411.08545">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> APDDv2: Aesthetics of Paintings and Drawings Dataset with Artist Labeled Scores and Comments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+X">Xin Jin</a>, <a href="/search/cs?searchtype=author&query=Qiao%2C+Q">Qianqian Qiao</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yi Lu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Huaye Wang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Heng Huang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+S">Shan Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jianfei Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rui Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08545v1-abstract-short" style="display: inline;"> Datasets play a pivotal role in training visual models, facilitating the development of abstract understandings of visual features through diverse image samples and multidimensional attributes. However, in the realm of aesthetic evaluation of artistic images, datasets remain relatively scarce. Existing painting datasets are often characterized by limited scoring dimensions and insufficient annotat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08545v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08545v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08545v1-abstract-full" style="display: none;"> Datasets play a pivotal role in training visual models, facilitating the development of abstract understandings of visual features through diverse image samples and multidimensional attributes. However, in the realm of aesthetic evaluation of artistic images, datasets remain relatively scarce. Existing painting datasets are often characterized by limited scoring dimensions and insufficient annotations, thereby constraining the advancement and application of automatic aesthetic evaluation methods in the domain of painting. To bridge this gap, we introduce the Aesthetics Paintings and Drawings Dataset (APDD), the first comprehensive collection of paintings encompassing 24 distinct artistic categories and 10 aesthetic attributes. Building upon the initial release of APDDv1, our ongoing research has identified opportunities for enhancement in data scale and annotation precision. Consequently, APDDv2 boasts an expanded image corpus and improved annotation quality, featuring detailed language comments to better cater to the needs of both researchers and practitioners seeking high-quality painting datasets. Furthermore, we present an updated version of the Art Assessment Network for Specific Painting Styles, denoted as ArtCLIP. Experimental validation demonstrates the superior performance of this revised model in the realm of aesthetic evaluation, surpassing its predecessor in accuracy and efficacy. The dataset and model are available at https://github.com/BestiVictory/APDDv2.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08545v1-abstract-full').style.display = 'none'; document.getElementById('2411.08545v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08504">arXiv:2411.08504</a> <span> [<a href="https://arxiv.org/pdf/2411.08504">pdf</a>, <a href="https://arxiv.org/format/2411.08504">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards Objective and Unbiased Decision Assessments with LLM-Enhanced Hierarchical Attention Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junhua Liu</a>, <a href="/search/cs?searchtype=author&query=Lim%2C+K+H">Kwan Hui Lim</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+R+K">Roy Ka-Wei Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08504v2-abstract-short" style="display: inline;"> How objective and unbiased are we while making decisions? This work investigates cognitive bias identification in high-stake decision making process by human experts, questioning its effectiveness in real-world settings, such as candidates assessments for university admission. We begin with a statistical analysis assessing correlations among different decision points among in the current process,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08504v2-abstract-full').style.display = 'inline'; document.getElementById('2411.08504v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08504v2-abstract-full" style="display: none;"> How objective and unbiased are we while making decisions? This work investigates cognitive bias identification in high-stake decision making process by human experts, questioning its effectiveness in real-world settings, such as candidates assessments for university admission. We begin with a statistical analysis assessing correlations among different decision points among in the current process, which discovers discrepancies that imply cognitive bias and inconsistency in decisions. This motivates our exploration of bias-aware AI-augmented workflow that surpass human judgment. We propose BGM-HAN, an enhanced Hierarchical Attention Network with Byte-Pair Encoding, Gated Residual Connections and Multi-Head Attention. Using it as a backbone model, we further propose a Shortlist-Analyse-Recommend (SAR) agentic workflow, which simulate real-world decision-making. In our experiments, both the proposed model and the agentic workflow significantly improves on both human judgment and alternative models, validated with real-world data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08504v2-abstract-full').style.display = 'none'; document.getElementById('2411.08504v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Source code is available at: https://github.com/junhua/bgm-han</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08488">arXiv:2411.08488</a> <span> [<a href="https://arxiv.org/pdf/2411.08488">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> UNSCT-HRNet: Modeling Anatomical Uncertainty for Landmark Detection in Total Hip Arthroplasty </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wan%2C+J">Jiaxin Wan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Lin Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haoran Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Liangwei Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wei Li</a>, <a href="/search/cs?searchtype=author&query=Kou%2C+S">Shuheng Kou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Runtian Li</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jiayi Tang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Juanxiu Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jing Zhang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+X">Xiaohui Du</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+R">Ruqian Hao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08488v1-abstract-short" style="display: inline;"> Total hip arthroplasty (THA) relies on accurate landmark detection from radiographic images, but unstructured data caused by irregular patient postures or occluded anatomical markers pose significant challenges for existing methods. To address this, we propose UNSCT-HRNet (Unstructured CT - High-Resolution Net), a deep learning-based framework that integrates a Spatial Relationship Fusion (SRF) mo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08488v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08488v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08488v1-abstract-full" style="display: none;"> Total hip arthroplasty (THA) relies on accurate landmark detection from radiographic images, but unstructured data caused by irregular patient postures or occluded anatomical markers pose significant challenges for existing methods. To address this, we propose UNSCT-HRNet (Unstructured CT - High-Resolution Net), a deep learning-based framework that integrates a Spatial Relationship Fusion (SRF) module and an Uncertainty Estimation (UE) module. The SRF module, utilizing coordinate convolution and polarized attention, enhances the model's ability to capture complex spatial relationships. Meanwhile, the UE module which based on entropy ensures predictions are anatomically relevant. For unstructured data, the proposed method can predict landmarks without relying on the fixed number of points, which shows higher accuracy and better robustness comparing with the existing methods. Our UNSCT-HRNet demonstrates over a 60% improvement across multiple metrics in unstructured data. The experimental results also reveal that our approach maintains good performance on the structured dataset. Overall, the proposed UNSCT-HRNet has the potential to be used as a new reliable, automated solution for THA surgical planning and postoperative monitoring. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08488v1-abstract-full').style.display = 'none'; document.getElementById('2411.08488v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08433">arXiv:2411.08433</a> <span> [<a href="https://arxiv.org/pdf/2411.08433">pdf</a>, <a href="https://arxiv.org/format/2411.08433">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> 3D Multi-Object Tracking with Semi-Supervised GRU-Kalman Filter </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaoxiang Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiaxin Liu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+M">Miaojie Feng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhaoxing Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xin Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08433v1-abstract-short" style="display: inline;"> 3D Multi-Object Tracking (MOT), a fundamental component of environmental perception, is essential for intelligent systems like autonomous driving and robotic sensing. Although Tracking-by-Detection frameworks have demonstrated excellent performance in recent years, their application in real-world scenarios faces significant challenges. Object movement in complex environments is often highly nonlin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08433v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08433v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08433v1-abstract-full" style="display: none;"> 3D Multi-Object Tracking (MOT), a fundamental component of environmental perception, is essential for intelligent systems like autonomous driving and robotic sensing. Although Tracking-by-Detection frameworks have demonstrated excellent performance in recent years, their application in real-world scenarios faces significant challenges. Object movement in complex environments is often highly nonlinear, while existing methods typically rely on linear approximations of motion. Furthermore, system noise is frequently modeled as a Gaussian distribution, which fails to capture the true complexity of the noise dynamics. These oversimplified modeling assumptions can lead to significant reductions in tracking precision. To address this, we propose a GRU-based MOT method, which introduces a learnable Kalman filter into the motion module. This approach is able to learn object motion characteristics through data-driven learning, thereby avoiding the need for manual model design and model error. At the same time, to avoid abnormal supervision caused by the wrong association between annotations and trajectories, we design a semi-supervised learning strategy to accelerate the convergence speed and improve the robustness of the model. Evaluation experiment on the nuScenes and Argoverse2 datasets demonstrates that our system exhibits superior performance and significant potential compared to traditional TBD methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08433v1-abstract-full').style.display = 'none'; document.getElementById('2411.08433v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08348">arXiv:2411.08348</a> <span> [<a href="https://arxiv.org/pdf/2411.08348">pdf</a>, <a href="https://arxiv.org/format/2411.08348">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Refining Translations with LLMs: A Constraint-Aware Iterative Prompting Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shangfeng Chen</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiayang Shi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Pu Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yinlin Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jingjing Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08348v1-abstract-short" style="display: inline;"> Large language models (LLMs) have demonstrated remarkable proficiency in machine translation (MT), even without specific training on the languages in question. However, translating rare words in low-resource or domain-specific contexts remains challenging for LLMs. To address this issue, we propose a multi-step prompt chain that enhances translation faithfulness by prioritizing key terms crucial f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08348v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08348v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08348v1-abstract-full" style="display: none;"> Large language models (LLMs) have demonstrated remarkable proficiency in machine translation (MT), even without specific training on the languages in question. However, translating rare words in low-resource or domain-specific contexts remains challenging for LLMs. To address this issue, we propose a multi-step prompt chain that enhances translation faithfulness by prioritizing key terms crucial for semantic accuracy. Our method first identifies these keywords and retrieves their translations from a bilingual dictionary, integrating them into the LLM's context using Retrieval-Augmented Generation (RAG). We further mitigate potential output hallucinations caused by long prompts through an iterative self-checking mechanism, where the LLM refines its translations based on lexical and semantic constraints. Experiments using Llama and Qwen as base models on the FLORES-200 and WMT datasets demonstrate significant improvements over baselines, highlighting the effectiveness of our approach in enhancing translation faithfulness and robustness, particularly in low-resource scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08348v1-abstract-full').style.display = 'none'; document.getElementById('2411.08348v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07975">arXiv:2411.07975</a> <span> [<a href="https://arxiv.org/pdf/2411.07975">pdf</a>, <a href="https://arxiv.org/format/2411.07975">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> JanusFlow: Harmonizing Autoregression and Rectified Flow for Unified Multimodal Understanding and Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+Y">Yiyang Ma</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xingchao Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiaokang Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wen Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+C">Chengyue Wu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhiyu Wu</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+Z">Zizheng Pan</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Z">Zhenda Xie</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haowei Zhang</a>, <a href="/search/cs?searchtype=author&query=yu%2C+X">Xingkai yu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+L">Liang Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yisong Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiaying Liu</a>, <a href="/search/cs?searchtype=author&query=Ruan%2C+C">Chong Ruan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07975v1-abstract-short" style="display: inline;"> We present JanusFlow, a powerful framework that unifies image understanding and generation in a single model. JanusFlow introduces a minimalist architecture that integrates autoregressive language models with rectified flow, a state-of-the-art method in generative modeling. Our key finding demonstrates that rectified flow can be straightforwardly trained within the large language model framework,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07975v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07975v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07975v1-abstract-full" style="display: none;"> We present JanusFlow, a powerful framework that unifies image understanding and generation in a single model. JanusFlow introduces a minimalist architecture that integrates autoregressive language models with rectified flow, a state-of-the-art method in generative modeling. Our key finding demonstrates that rectified flow can be straightforwardly trained within the large language model framework, eliminating the need for complex architectural modifications. To further improve the performance of our unified model, we adopt two key strategies: (i) decoupling the understanding and generation encoders, and (ii) aligning their representations during unified training. Extensive experiments show that JanusFlow achieves comparable or superior performance to specialized models in their respective domains, while significantly outperforming existing unified approaches across standard benchmarks. This work represents a step toward more efficient and versatile vision-language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07975v1-abstract-full').style.display = 'none'; document.getElementById('2411.07975v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07641">arXiv:2411.07641</a> <span> [<a href="https://arxiv.org/pdf/2411.07641">pdf</a>, <a href="https://arxiv.org/format/2411.07641">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Top-$n蟽$: Not All Logits Are You Need </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+C">Chenxia Tang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jianchun Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hongli Xu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+L">Liusheng Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07641v1-abstract-short" style="display: inline;"> Large language models (LLMs) typically employ greedy decoding or low-temperature sampling for reasoning tasks, reflecting a perceived trade-off between diversity and accuracy. We challenge this convention by introducing top-$n蟽$, a novel sampling method that operates directly on pre-softmax logits by leveraging a statistical threshold. Our key insight is that logits naturally separate into a Gauss… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07641v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07641v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07641v1-abstract-full" style="display: none;"> Large language models (LLMs) typically employ greedy decoding or low-temperature sampling for reasoning tasks, reflecting a perceived trade-off between diversity and accuracy. We challenge this convention by introducing top-$n蟽$, a novel sampling method that operates directly on pre-softmax logits by leveraging a statistical threshold. Our key insight is that logits naturally separate into a Gaussian-distributed noisy region and a distinct informative region, enabling efficient token filtering without complex probability manipulations. Unlike existing methods (e.g., top-$p$, min-$p$) that inadvertently include more noise tokens at higher temperatures, top-$n蟽$ maintains a stable sampling space regardless of temperature scaling. We also provide a theoretical analysis of top-$n蟽$ to better understand its behavior. The extensive experimental results across four reasoning-focused datasets demonstrate that our method not only outperforms existing sampling approaches but also surpasses greedy decoding, while maintaining consistent performance even at high temperatures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07641v1-abstract-full').style.display = 'none'; document.getElementById('2411.07641v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07527">arXiv:2411.07527</a> <span> [<a href="https://arxiv.org/pdf/2411.07527">pdf</a>, <a href="https://arxiv.org/format/2411.07527">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.24963/ijcai.2024/707">10.24963/ijcai.2024/707 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Prompt-enhanced Network for Hateful Meme Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junxi Liu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Y">Yanyan Feng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jiehai Chen</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+Y">Yun Xue</a>, <a href="/search/cs?searchtype=author&query=Li%2C+F">Fenghuan Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07527v1-abstract-short" style="display: inline;"> The dynamic expansion of social media has led to an inundation of hateful memes on media platforms, accentuating the growing need for efficient identification and removal. Acknowledging the constraints of conventional multimodal hateful meme classification, which heavily depends on external knowledge and poses the risk of including irrelevant or redundant content, we developed Pen -- a prompt-enha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07527v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07527v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07527v1-abstract-full" style="display: none;"> The dynamic expansion of social media has led to an inundation of hateful memes on media platforms, accentuating the growing need for efficient identification and removal. Acknowledging the constraints of conventional multimodal hateful meme classification, which heavily depends on external knowledge and poses the risk of including irrelevant or redundant content, we developed Pen -- a prompt-enhanced network framework based on the prompt learning approach. Specifically, after constructing the sequence through the prompt method and encoding it with a language model, we performed region information global extraction on the encoded sequence for multi-view perception. By capturing global information about inference instances and demonstrations, Pen facilitates category selection by fully leveraging sequence information. This approach significantly improves model classification accuracy. Additionally, to bolster the model's reasoning capabilities in the feature space, we introduced prompt-aware contrastive learning into the framework to improve the quality of sample feature distributions. Through extensive ablation experiments on two public datasets, we evaluate the effectiveness of the Pen framework, concurrently comparing it with state-of-the-art model baselines. Our research findings highlight that Pen surpasses manual prompt methods, showcasing superior generalization and classification accuracy in hateful meme classification tasks. Our code is available at https://github.com/juszzi/Pen. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07527v1-abstract-full').style.display = 'none'; document.getElementById('2411.07527v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence Main Track. Pages 6397-6405</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07267">arXiv:2411.07267</a> <span> [<a href="https://arxiv.org/pdf/2411.07267">pdf</a>, <a href="https://arxiv.org/format/2411.07267">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> A Survey on Data Markets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiayao Zhang</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Y">Yuran Bi</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+M">Mengye Cheng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jinfei Liu</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+K">Kui Ren</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Q">Qiheng Sun</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yihang Wu</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yang Cao</a>, <a href="/search/cs?searchtype=author&query=Fernandez%2C+R+C">Raul Castro Fernandez</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Haifeng Xu</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+R">Ruoxi Jia</a>, <a href="/search/cs?searchtype=author&query=Kwon%2C+Y">Yongchan Kwon</a>, <a href="/search/cs?searchtype=author&query=Pei%2C+J">Jian Pei</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J+T">Jiachen T. Wang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+H">Haocheng Xia</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+L">Li Xiong</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xiaohui Yu</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+J">James Zou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07267v1-abstract-short" style="display: inline;"> Data is the new oil of the 21st century. The growing trend of trading data for greater welfare has led to the emergence of data markets. A data market is any mechanism whereby the exchange of data products including datasets and data derivatives takes place as a result of data buyers and data sellers being in contact with one another, either directly or through mediating agents. It serves as a coo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07267v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07267v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07267v1-abstract-full" style="display: none;"> Data is the new oil of the 21st century. The growing trend of trading data for greater welfare has led to the emergence of data markets. A data market is any mechanism whereby the exchange of data products including datasets and data derivatives takes place as a result of data buyers and data sellers being in contact with one another, either directly or through mediating agents. It serves as a coordinating mechanism by which several functions, including the pricing and the distribution of data as the most important ones, interact to make the value of data fully exploited and enhanced. In this article, we present a comprehensive survey of this important and emerging direction from the aspects of data search, data productization, data transaction, data pricing, revenue allocation as well as privacy, security, and trust issues. We also investigate the government policies and industry status of data markets across different countries and different domains. Finally, we identify the unresolved challenges and discuss possible future directions for the development of data markets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07267v1-abstract-full').style.display = 'none'; document.getElementById('2411.07267v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07140">arXiv:2411.07140</a> <span> [<a href="https://arxiv.org/pdf/2411.07140">pdf</a>, <a href="https://arxiv.org/format/2411.07140">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Chinese SimpleQA: A Chinese Factuality Evaluation for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=He%2C+Y">Yancheng He</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shilong Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiaheng Liu</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+Y">Yingshui Tan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Weixun Wang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Hui Huang</a>, <a href="/search/cs?searchtype=author&query=Bu%2C+X">Xingyuan Bu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+H">Hangyu Guo</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+C">Chengwei Hu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+B">Boren Zheng</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zhuoran Lin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xuepeng Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+D">Dekai Sun</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+S">Shirong Lin</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Z">Zhicheng Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+X">Xiaoyong Zhu</a>, <a href="/search/cs?searchtype=author&query=Su%2C+W">Wenbo Su</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+B">Bo Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07140v2-abstract-short" style="display: inline;"> New LLM evaluation benchmarks are important to align with the rapid development of Large Language Models (LLMs). In this work, we present Chinese SimpleQA, the first comprehensive Chinese benchmark to evaluate the factuality ability of language models to answer short questions, and Chinese SimpleQA mainly has five properties (i.e., Chinese, Diverse, High-quality, Static, Easy-to-evaluate). Specifi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07140v2-abstract-full').style.display = 'inline'; document.getElementById('2411.07140v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07140v2-abstract-full" style="display: none;"> New LLM evaluation benchmarks are important to align with the rapid development of Large Language Models (LLMs). In this work, we present Chinese SimpleQA, the first comprehensive Chinese benchmark to evaluate the factuality ability of language models to answer short questions, and Chinese SimpleQA mainly has five properties (i.e., Chinese, Diverse, High-quality, Static, Easy-to-evaluate). Specifically, first, we focus on the Chinese language over 6 major topics with 99 diverse subtopics. Second, we conduct a comprehensive quality control process to achieve high-quality questions and answers, where the reference answers are static and cannot be changed over time. Third, following SimpleQA, the questions and answers are very short, and the grading process is easy-to-evaluate based on OpenAI API. Based on Chinese SimpleQA, we perform a comprehensive evaluation on the factuality abilities of existing LLMs. Finally, we hope that Chinese SimpleQA could guide the developers to better understand the Chinese factuality abilities of their models and facilitate the growth of foundation models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07140v2-abstract-full').style.display = 'none'; document.getElementById('2411.07140v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07087">arXiv:2411.07087</a> <span> [<a href="https://arxiv.org/pdf/2411.07087">pdf</a>, <a href="https://arxiv.org/format/2411.07087">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> OCMDP: Observation-Constrained Markov Decision Process </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+T">Taiyi Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jianheng Liu</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+B">Bryan Lee</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhihao Wu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yu Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07087v2-abstract-short" style="display: inline;"> In many practical applications, decision-making processes must balance the costs of acquiring information with the benefits it provides. Traditional control systems often assume full observability, an unrealistic assumption when observations are expensive. We tackle the challenge of simultaneously learning observation and control strategies in such cost-sensitive environments by introducing the Ob… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07087v2-abstract-full').style.display = 'inline'; document.getElementById('2411.07087v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07087v2-abstract-full" style="display: none;"> In many practical applications, decision-making processes must balance the costs of acquiring information with the benefits it provides. Traditional control systems often assume full observability, an unrealistic assumption when observations are expensive. We tackle the challenge of simultaneously learning observation and control strategies in such cost-sensitive environments by introducing the Observation-Constrained Markov Decision Process (OCMDP), where the policy influences the observability of the true state. To manage the complexity arising from the combined observation and control actions, we develop an iterative, model-free deep reinforcement learning algorithm that separates the sensing and control components of the policy. This decomposition enables efficient learning in the expanded action space by focusing on when and what to observe, as well as determining optimal control actions, without requiring knowledge of the environment's dynamics. We validate our approach on a simulated diagnostic task and a realistic healthcare environment using HeartPole. Given both scenarios, the experimental results demonstrate that our model achieves a substantial reduction in observation costs on average, significantly outperforming baseline methods by a notable margin in efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07087v2-abstract-full').style.display = 'none'; document.getElementById('2411.07087v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Full paper, 14 Pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07025">arXiv:2411.07025</a> <span> [<a href="https://arxiv.org/pdf/2411.07025">pdf</a>, <a href="https://arxiv.org/format/2411.07025">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Scaling Mesh Generation via Compressive Tokenization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Weng%2C+H">Haohan Weng</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zibo Zhao</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+B">Biwen Lei</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xianghui Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jian Liu</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+Z">Zeqiang Lai</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhuo Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuhong Liu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Jie Jiang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+C">Chunchao Guo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tong Zhang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+S">Shenghua Gao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C+L+P">C. L. Philip Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07025v1-abstract-short" style="display: inline;"> We propose a compressive yet effective mesh representation, Blocked and Patchified Tokenization (BPT), facilitating the generation of meshes exceeding 8k faces. BPT compresses mesh sequences by employing block-wise indexing and patch aggregation, reducing their length by approximately 75\% compared to the original sequences. This compression milestone unlocks the potential to utilize mesh data wit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07025v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07025v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07025v1-abstract-full" style="display: none;"> We propose a compressive yet effective mesh representation, Blocked and Patchified Tokenization (BPT), facilitating the generation of meshes exceeding 8k faces. BPT compresses mesh sequences by employing block-wise indexing and patch aggregation, reducing their length by approximately 75\% compared to the original sequences. This compression milestone unlocks the potential to utilize mesh data with significantly more faces, thereby enhancing detail richness and improving generation robustness. Empowered with the BPT, we have built a foundation mesh generative model training on scaled mesh data to support flexible control for point clouds and images. Our model demonstrates the capability to generate meshes with intricate details and accurate topology, achieving SoTA performance on mesh generation and reaching the level for direct product usage. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07025v1-abstract-full').style.display = 'none'; document.getElementById('2411.07025v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Homepage: https://whaohan.github.io/bpt , Code: https://github.com/whaohan/bpt</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06857">arXiv:2411.06857</a> <span> [<a href="https://arxiv.org/pdf/2411.06857">pdf</a>, <a href="https://arxiv.org/ps/2411.06857">ps</a>, <a href="https://arxiv.org/format/2411.06857">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Discrete Mathematics">cs.DM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> </div> </div> <p class="title is-5 mathjax"> Phase Transitions via Complex Extensions of Markov Chains </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jingcheng Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chunyang Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+Y">Yitong Yin</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yixiao Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06857v1-abstract-short" style="display: inline;"> We study algebraic properties of partition functions, particularly the location of zeros, through the lens of rapidly mixing Markov chains. The classical Lee-Yang program initiated the study of phase transitions via locating complex zeros of partition functions. Markov chains, besides serving as algorithms, have also been used to model physical processes tending to equilibrium. In many scenarios,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06857v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06857v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06857v1-abstract-full" style="display: none;"> We study algebraic properties of partition functions, particularly the location of zeros, through the lens of rapidly mixing Markov chains. The classical Lee-Yang program initiated the study of phase transitions via locating complex zeros of partition functions. Markov chains, besides serving as algorithms, have also been used to model physical processes tending to equilibrium. In many scenarios, rapid mixing of Markov chains coincides with the absence of phase transitions (complex zeros). Prior works have shown that the absence of phase transitions implies rapid mixing of Markov chains. We reveal a converse connection by lifting probabilistic tools for the analysis of Markov chains to study complex zeros of partition functions. Our motivating example is the independence polynomial on $k$-uniform hypergraphs, where the best-known zero-free regime has been significantly lagging behind the regime where we have rapidly mixing Markov chains for the underlying hypergraph independent sets. Specifically, the Glauber dynamics is known to mix rapidly on independent sets in a $k$-uniform hypergraph of maximum degree $螖$ provided that $螖\lesssim 2^{k/2}$. On the other hand, the best-known zero-freeness around the point $1$ of the independence polynomial on $k$-uniform hypergraphs requires $螖\le 5$, the same bound as on a graph. By introducing a complex extension of Markov chains, we lift an existing percolation argument to the complex plane, and show that if $螖\lesssim 2^{k/2}$, the Markov chain converges in a complex neighborhood, and the independence polynomial itself does not vanish in the same neighborhood. In the same regime, our result also implies central limit theorems for the size of a uniformly random independent set, and deterministic approximation algorithms for the number of hypergraph independent sets of size $k \le 伪n$ for some constant $伪$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06857v1-abstract-full').style.display = 'none'; document.getElementById('2411.06857v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">37 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06796">arXiv:2411.06796</a> <span> [<a href="https://arxiv.org/pdf/2411.06796">pdf</a>, <a href="https://arxiv.org/format/2411.06796">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Automatically Write Code Checker: An LLM-based Approach with Logic-guided API Retrieval and Case by Case Iteration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yuanyuan Xie</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jun Liu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+J">Jiwei Yan</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jinhao Huang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+J">Jun Yan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jian Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06796v1-abstract-short" style="display: inline;"> With the rising demand for code quality assurance, developers are not only utilizing existing static code checkers but also seeking custom checkers to satisfy their specific needs. Nowadays, various code-checking frameworks provide extensive checker customization interfaces to meet this need. However, both the abstract checking logic as well as the complex API usage of large-scale frameworks make… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06796v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06796v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06796v1-abstract-full" style="display: none;"> With the rising demand for code quality assurance, developers are not only utilizing existing static code checkers but also seeking custom checkers to satisfy their specific needs. Nowadays, various code-checking frameworks provide extensive checker customization interfaces to meet this need. However, both the abstract checking logic as well as the complex API usage of large-scale frameworks make this task challenging. To this end, automated code checker generation is anticipated to ease the burden of checker development. In this paper, we explore the feasibility of automated checker generation and propose AutoChecker, an innovative LLM-powered approach that can write code checkers automatically based on only a rule description and a test suite. Instead of generating the checker at once, AutoChecker incrementally updates the checker with the rule and one single test case each time, i.e., it iteratively generates the checker case by case. During each iteration, AutoChecker first decomposes the whole logic into a series of sub-operations and then uses the logic-guided API-context retrieval strategy to search related API-contexts from all the framework APIs. To evaluate the effectiveness of AutoChecker, we apply AutoChecker and two LLM-based baseline approaches to automatically generate checkers for 20 built-in PMD rules, including easy rules and hard rules. Experimental results demonstrate that AutoChecker significantly outperforms baseline approaches across all effectiveness metrics, where its average test pass rate improved over 4.2 times. Moreover, the checkers generated by AutoChecker are successfully applied to real-world projects, matching the performance of official checkers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06796v1-abstract-full').style.display = 'none'; document.getElementById('2411.06796v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06764">arXiv:2411.06764</a> <span> [<a href="https://arxiv.org/pdf/2411.06764">pdf</a>, <a href="https://arxiv.org/format/2411.06764">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Multi-Stage Knowledge Integration of Vision-Language Models for Continual Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hongsheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+Z">Zhong Ji</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jingren Liu</a>, <a href="/search/cs?searchtype=author&query=Pang%2C+Y">Yanwei Pang</a>, <a href="/search/cs?searchtype=author&query=Han%2C+J">Jungong Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06764v1-abstract-short" style="display: inline;"> Vision Language Models (VLMs), pre-trained on large-scale image-text datasets, enable zero-shot predictions for unseen data but may underperform on specific unseen tasks. Continual learning (CL) can help VLMs effectively adapt to new data distributions without joint training, but faces challenges of catastrophic forgetting and generalization forgetting. Although significant progress has been achie… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06764v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06764v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06764v1-abstract-full" style="display: none;"> Vision Language Models (VLMs), pre-trained on large-scale image-text datasets, enable zero-shot predictions for unseen data but may underperform on specific unseen tasks. Continual learning (CL) can help VLMs effectively adapt to new data distributions without joint training, but faces challenges of catastrophic forgetting and generalization forgetting. Although significant progress has been achieved by distillation-based methods, they exhibit two severe limitations. One is the popularly adopted single-teacher paradigm fails to impart comprehensive knowledge, The other is the existing methods inadequately leverage the multimodal information in the original training dataset, instead they rely on additional data for distillation, which increases computational and storage overhead. To mitigate both limitations, by drawing on Knowledge Integration Theory (KIT), we propose a Multi-Stage Knowledge Integration network (MulKI) to emulate the human learning process in distillation methods. MulKI achieves this through four stages, including Eliciting Ideas, Adding New Ideas, Distinguishing Ideas, and Making Connections. During the four stages, we first leverage prototypes to align across modalities, eliciting cross-modal knowledge, then adding new knowledge by constructing fine-grained intra- and inter-modality relationships with prototypes. After that, knowledge from two teacher models is adaptively distinguished and re-weighted. Finally, we connect between models from intra- and inter-task, integrating preceding and new knowledge. Our method demonstrates significant improvements in maintaining zero-shot capabilities while supporting continual learning across diverse downstream tasks, showcasing its potential in adapting VLMs to evolving data distributions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06764v1-abstract-full').style.display = 'none'; document.getElementById('2411.06764v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06525">arXiv:2411.06525</a> <span> [<a href="https://arxiv.org/pdf/2411.06525">pdf</a>, <a href="https://arxiv.org/format/2411.06525">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> I2VControl-Camera: Precise Video Camera Control with Adjustable Motion Strength </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+W">Wanquan Feng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiawei Liu</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+P">Pengqi Tu</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+T">Tianhao Qi</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Mingzhen Sun</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+T">Tianxiang Ma</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Songtao Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+S">Siyu Zhou</a>, <a href="/search/cs?searchtype=author&query=He%2C+Q">Qian He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06525v1-abstract-short" style="display: inline;"> Video generation technologies are developing rapidly and have broad potential applications. Among these technologies, camera control is crucial for generating professional-quality videos that accurately meet user expectations. However, existing camera control methods still suffer from several limitations, including control precision and the neglect of the control for subject motion dynamics. In th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06525v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06525v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06525v1-abstract-full" style="display: none;"> Video generation technologies are developing rapidly and have broad potential applications. Among these technologies, camera control is crucial for generating professional-quality videos that accurately meet user expectations. However, existing camera control methods still suffer from several limitations, including control precision and the neglect of the control for subject motion dynamics. In this work, we propose I2VControl-Camera, a novel camera control method that significantly enhances controllability while providing adjustability over the strength of subject motion. To improve control precision, we employ point trajectory in the camera coordinate system instead of only extrinsic matrix information as our control signal. To accurately control and adjust the strength of subject motion, we explicitly model the higher-order components of the video trajectory expansion, not merely the linear terms, and design an operator that effectively represents the motion strength. We use an adapter architecture that is independent of the base model structure. Experiments on static and dynamic scenes show that our framework outperformances previous methods both quantitatively and qualitatively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06525v1-abstract-full').style.display = 'none'; document.getElementById('2411.06525v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06272">arXiv:2411.06272</a> <span> [<a href="https://arxiv.org/pdf/2411.06272">pdf</a>, <a href="https://arxiv.org/format/2411.06272">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> Golden Touchstone: A Comprehensive Bilingual Benchmark for Evaluating Financial Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiaojun Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junxi Liu</a>, <a href="/search/cs?searchtype=author&query=Su%2C+H">Huanyi Su</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zhouchi Lin</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+Y">Yiyan Qi</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+C">Chengjin Xu</a>, <a href="/search/cs?searchtype=author&query=Su%2C+J">Jiajun Su</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+J">Jiajie Zhong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+F">Fuwei Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Saizhuo Wang</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+F">Fengrui Hua</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jia Li</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jian Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06272v1-abstract-short" style="display: inline;"> As large language models become increasingly prevalent in the financial sector, there is a pressing need for a standardized method to comprehensively assess their performance. However, existing finance benchmarks often suffer from limited language and task coverage, as well as challenges such as low-quality datasets and inadequate adaptability for LLM evaluation. To address these limitations, we p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06272v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06272v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06272v1-abstract-full" style="display: none;"> As large language models become increasingly prevalent in the financial sector, there is a pressing need for a standardized method to comprehensively assess their performance. However, existing finance benchmarks often suffer from limited language and task coverage, as well as challenges such as low-quality datasets and inadequate adaptability for LLM evaluation. To address these limitations, we propose "Golden Touchstone", the first comprehensive bilingual benchmark for financial LLMs, which incorporates representative datasets from both Chinese and English across eight core financial NLP tasks. Developed from extensive open source data collection and industry-specific demands, this benchmark includes a variety of financial tasks aimed at thoroughly assessing models' language understanding and generation capabilities. Through comparative analysis of major models on the benchmark, such as GPT-4o Llama3, FinGPT and FinMA, we reveal their strengths and limitations in processing complex financial information. Additionally, we open-sourced Touchstone-GPT, a financial LLM trained through continual pre-training and financial instruction tuning, which demonstrates strong performance on the bilingual benchmark but still has limitations in specific tasks.This research not only provides the financial large language models with a practical evaluation tool but also guides the development and optimization of future research. The source code for Golden Touchstone and model weight of Touchstone-GPT have been made publicly available at \url{https://github.com/IDEA-FinAI/Golden-Touchstone}, contributing to the ongoing evolution of FinLLMs and fostering further research in this critical area. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06272v1-abstract-full').style.display = 'none'; document.getElementById('2411.06272v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">26 pages, 9 tables, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06229">arXiv:2411.06229</a> <span> [<a href="https://arxiv.org/pdf/2411.06229">pdf</a>, <a href="https://arxiv.org/format/2411.06229">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Multimodal Contrastive Learning of Urban Space Representations from POI Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinglei Wang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+T">Tao Cheng</a>, <a href="/search/cs?searchtype=author&query=Law%2C+S">Stephen Law</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zichao Zeng</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+L">Lu Yin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junyuan Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06229v1-abstract-short" style="display: inline;"> Existing methods for learning urban space representations from Point-of-Interest (POI) data face several limitations, including issues with geographical delineation, inadequate spatial information modelling, underutilisation of POI semantic attributes, and computational inefficiencies. To address these issues, we propose CaLLiPer (Contrastive Language-Location Pre-training), a novel representation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06229v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06229v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06229v1-abstract-full" style="display: none;"> Existing methods for learning urban space representations from Point-of-Interest (POI) data face several limitations, including issues with geographical delineation, inadequate spatial information modelling, underutilisation of POI semantic attributes, and computational inefficiencies. To address these issues, we propose CaLLiPer (Contrastive Language-Location Pre-training), a novel representation learning model that directly embeds continuous urban spaces into vector representations that can capture the spatial and semantic distribution of urban environment. This model leverages a multimodal contrastive learning objective, aligning location embeddings with textual POI descriptions, thereby bypassing the need for complex training corpus construction and negative sampling. We validate CaLLiPer's effectiveness by applying it to learning urban space representations in London, UK, where it demonstrates 5-15% improvement in predictive performance for land use classification and socioeconomic mapping tasks compared to state-of-the-art methods. Visualisations of the learned representations further illustrate our model's advantages in capturing spatial variations in urban semantics with high accuracy and fine resolution. Additionally, CaLLiPer achieves reduced training time, showcasing its efficiency and scalability. This work provides a promising pathway for scalable, semantically rich urban space representation learning that can support the development of geospatial foundation models. The implementation code is available at https://github.com/xlwang233/CaLLiPer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06229v1-abstract-full').style.display = 'none'; document.getElementById('2411.06229v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 5 figures, 7 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06065">arXiv:2411.06065</a> <span> [<a href="https://arxiv.org/pdf/2411.06065">pdf</a>, <a href="https://arxiv.org/format/2411.06065">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> DFT: A Dual-branch Framework of Fluctuation and Trend for Stock Price Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dong%2C+C">Chengqi Dong</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Z">Zhiyuan Cao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+S+K">S Kevin Zhou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jia Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06065v1-abstract-short" style="display: inline;"> Stock price prediction is of significant importance in quantitative investment. Existing approaches encounter two primary issues: First, they often overlook the crucial role of capturing short-term stock fluctuations for predicting high-volatility returns. Second, mainstream methods, relying on graphs or attention mechanisms, inadequately explore the temporal relationships among stocks, often blur… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06065v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06065v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06065v1-abstract-full" style="display: none;"> Stock price prediction is of significant importance in quantitative investment. Existing approaches encounter two primary issues: First, they often overlook the crucial role of capturing short-term stock fluctuations for predicting high-volatility returns. Second, mainstream methods, relying on graphs or attention mechanisms, inadequately explore the temporal relationships among stocks, often blurring distinctions in their characteristics over time and the causal relationships before and after. However, the high volatility of stocks and the intricate market correlations are crucial to accurately predicting stock prices. To address these challenges, we propose a Dual-branch Framework of Fluctuation and Trend (DFT), which decomposes stocks into trend and fluctuation components. By employing a carefully design decomposition module, DFT effectively extracts short-term fluctuations and trend information from stocks while explicitly modeling temporal variations and causal correlations. Our extensive experiments demonstrate that DFT outperforms existing methods across multiple metrics, including a 300% improvement in ranking metrics and a 400% improvement in portfolio-based indicators. Through detailed experiments, we provide valuable insights into different roles of trends and fluctuations in stock price prediction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06065v1-abstract-full').style.display = 'none'; document.getElementById('2411.06065v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05898">arXiv:2411.05898</a> <span> [<a href="https://arxiv.org/pdf/2411.05898">pdf</a>, <a href="https://arxiv.org/format/2411.05898">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Integrating Object Detection Modality into Visual Language Model for Enhanced Autonomous Driving Agent </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=He%2C+L">Linfeng He</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yiming Sun</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+S">Sihao Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiaxu Liu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xiaowei Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05898v1-abstract-short" style="display: inline;"> In this paper, we propose a novel framework for enhancing visual comprehension in autonomous driving systems by integrating visual language models (VLMs) with additional visual perception module specialised in object detection. We extend the Llama-Adapter architecture by incorporating a YOLOS-based detection network alongside the CLIP perception network, addressing limitations in object detection… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05898v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05898v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05898v1-abstract-full" style="display: none;"> In this paper, we propose a novel framework for enhancing visual comprehension in autonomous driving systems by integrating visual language models (VLMs) with additional visual perception module specialised in object detection. We extend the Llama-Adapter architecture by incorporating a YOLOS-based detection network alongside the CLIP perception network, addressing limitations in object detection and localisation. Our approach introduces camera ID-separators to improve multi-view processing, crucial for comprehensive environmental awareness. Experiments on the DriveLM visual question answering challenge demonstrate significant improvements over baseline models, with enhanced performance in ChatGPT scores, BLEU scores, and CIDEr metrics, indicating closeness of model answer to ground truth. Our method represents a promising step towards more capable and interpretable autonomous driving systems. Possible safety enhancement enabled by detection modality is also discussed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05898v1-abstract-full').style.display = 'none'; document.getElementById('2411.05898v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by SafeGenAI workshop of NeurIPS 2024</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Liu%2C+J&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Liu%2C+J&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+J&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+J&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+J&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+J&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>