CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 168 results for author: <span class="mathjax">Ding, K</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Ding%2C+K">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Ding, K"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Ding%2C+K&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Ding, K"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Ding%2C+K&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Ding%2C+K&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Ding%2C+K&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Ding%2C+K&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Ding%2C+K&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16065">arXiv:2502.16065</a> <span> [<a href="https://arxiv.org/pdf/2502.16065">pdf</a>, <a href="https://arxiv.org/format/2502.16065">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Survey of Model Extraction Attacks and Defenses in Distributed Computing Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+K">Kaixiang Zhao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lincan Li</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaize Ding</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+N+Z">Neil Zhenqiang Gong</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yue Zhao</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Y">Yushun Dong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16065v1-abstract-short" style="display: inline;"> Model Extraction Attacks (MEAs) threaten modern machine learning systems by enabling adversaries to steal models, exposing intellectual property and training data. With the increasing deployment of machine learning models in distributed computing environments, including cloud, edge, and federated learning settings, each paradigm introduces distinct vulnerabilities and challenges. Without a unified… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16065v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16065v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16065v1-abstract-full" style="display: none;"> Model Extraction Attacks (MEAs) threaten modern machine learning systems by enabling adversaries to steal models, exposing intellectual property and training data. With the increasing deployment of machine learning models in distributed computing environments, including cloud, edge, and federated learning settings, each paradigm introduces distinct vulnerabilities and challenges. Without a unified perspective on MEAs across these distributed environments, organizations risk fragmented defenses, inadequate risk assessments, and substantial economic and privacy losses. This survey is motivated by the urgent need to understand how the unique characteristics of cloud, edge, and federated deployments shape attack vectors and defense requirements. We systematically examine the evolution of attack methodologies and defense mechanisms across these environments, demonstrating how environmental factors influence security strategies in critical sectors such as autonomous vehicles, healthcare, and financial services. By synthesizing recent advances in MEAs research and discussing the limitations of current evaluation practices, this survey provides essential insights for developing robust and adaptive defense strategies. Our comprehensive approach highlights the importance of integrating protective measures across the entire distributed computing landscape to ensure the secure deployment of machine learning models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16065v1-abstract-full').style.display = 'none'; document.getElementById('2502.16065v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16055">arXiv:2502.16055</a> <span> [<a href="https://arxiv.org/pdf/2502.16055">pdf</a>, <a href="https://arxiv.org/format/2502.16055">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> MedForge: Building Medical Foundation Models Like Open Source Software Development </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+Z">Zheling Tan</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kexin Ding</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jin Gao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+M">Mu Zhou</a>, <a href="/search/cs?searchtype=author&query=Metaxas%2C+D">Dimitris Metaxas</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shaoting Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dequan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16055v1-abstract-short" style="display: inline;"> Foundational models (FMs) have made significant strides in the healthcare domain. Yet the data silo challenge and privacy concern remain in healthcare systems, hindering safe medical data sharing and collaborative model development among institutions. The collection and curation of scalable clinical datasets increasingly become the bottleneck for training strong FMs. In this study, we propose Medi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16055v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16055v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16055v1-abstract-full" style="display: none;"> Foundational models (FMs) have made significant strides in the healthcare domain. Yet the data silo challenge and privacy concern remain in healthcare systems, hindering safe medical data sharing and collaborative model development among institutions. The collection and curation of scalable clinical datasets increasingly become the bottleneck for training strong FMs. In this study, we propose Medical Foundation Models Merging (MedForge), a cooperative framework enabling a community-driven medical foundation model development, meanwhile preventing the information leakage of raw patient data and mitigating synchronization model development issues across clinical institutions. MedForge offers a bottom-up model construction mechanism by flexibly merging task-specific Low-Rank Adaptation (LoRA) modules, which can adapt to downstream tasks while retaining original model parameters. Through an asynchronous LoRA module integration scheme, the resulting composite model can progressively enhance its comprehensive performance on various clinical tasks. MedForge shows strong performance on multiple clinical datasets (e.g., breast cancer, lung cancer, and colon cancer) collected from different institutions. Our major findings highlight the value of collaborative foundation models in advancing multi-center clinical collaboration effectively and cohesively. Our code is publicly available at https://github.com/TanZheling/MedForge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16055v1-abstract-full').style.display = 'none'; document.getElementById('2502.16055v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14739">arXiv:2502.14739</a> <span> [<a href="https://arxiv.org/pdf/2502.14739">pdf</a>, <a href="https://arxiv.org/format/2502.14739">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SuperGPQA: Scaling LLM Evaluation across 285 Graduate Disciplines </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Team%2C+M">M-A-P Team</a>, <a href="/search/cs?searchtype=author&query=Du%2C+X">Xinrun Du</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+Y">Yifan Yao</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+K">Kaijing Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bingli Wang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianyu Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+K">Kang Zhu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Minghao Liu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+Y">Yiming Liang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+X">Xiaolong Jin</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Z">Zhenlin Wei</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+C">Chujie Zheng</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+K">Kaixin Deng</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+S">Shian Jia</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+S">Sichao Jiang</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+Y">Yiyan Liao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rui Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qinrui Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Sirun Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yizhi Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yunwen Li</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+D">Dehua Ma</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+Y">Yuansheng Ni</a>, <a href="/search/cs?searchtype=author&query=Que%2C+H">Haoran Que</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qiyao Wang</a> , et al. (71 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14739v2-abstract-short" style="display: inline;"> Large language models (LLMs) have demonstrated remarkable proficiency in mainstream academic disciplines such as mathematics, physics, and computer science. However, human knowledge encompasses over 200 specialized disciplines, far exceeding the scope of existing benchmarks. The capabilities of LLMs in many of these specialized fields-particularly in light industry, agriculture, and service-orient… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14739v2-abstract-full').style.display = 'inline'; document.getElementById('2502.14739v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14739v2-abstract-full" style="display: none;"> Large language models (LLMs) have demonstrated remarkable proficiency in mainstream academic disciplines such as mathematics, physics, and computer science. However, human knowledge encompasses over 200 specialized disciplines, far exceeding the scope of existing benchmarks. The capabilities of LLMs in many of these specialized fields-particularly in light industry, agriculture, and service-oriented disciplines-remain inadequately evaluated. To address this gap, we present SuperGPQA, a comprehensive benchmark that evaluates graduate-level knowledge and reasoning capabilities across 285 disciplines. Our benchmark employs a novel Human-LLM collaborative filtering mechanism to eliminate trivial or ambiguous questions through iterative refinement based on both LLM responses and expert feedback. Our experimental results reveal significant room for improvement in the performance of current state-of-the-art LLMs across diverse knowledge domains (e.g., the reasoning-focused model DeepSeek-R1 achieved the highest accuracy of 61.82% on SuperGPQA), highlighting the considerable gap between current model capabilities and artificial general intelligence. Additionally, we present comprehensive insights from our management of a large-scale annotation process, involving over 80 expert annotators and an interactive Human-LLM collaborative system, offering valuable methodological guidance for future research initiatives of comparable scope. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14739v2-abstract-full').style.display = 'none'; document.getElementById('2502.14739v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02257">arXiv:2502.02257</a> <span> [<a href="https://arxiv.org/pdf/2502.02257">pdf</a>, <a href="https://arxiv.org/format/2502.02257">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> UNIP: Rethinking Pre-trained Attention Patterns for Infrared Semantic Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tao Zhang</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+J">Jinyong Wen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhen Chen</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kun Ding</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+S">Shiming Xiang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+C">Chunhong Pan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02257v1-abstract-short" style="display: inline;"> Pre-training techniques significantly enhance the performance of semantic segmentation tasks with limited training data. However, the efficacy under a large domain gap between pre-training (e.g. RGB) and fine-tuning (e.g. infrared) remains underexplored. In this study, we first benchmark the infrared semantic segmentation performance of various pre-training methods and reveal several phenomena dis… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02257v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02257v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02257v1-abstract-full" style="display: none;"> Pre-training techniques significantly enhance the performance of semantic segmentation tasks with limited training data. However, the efficacy under a large domain gap between pre-training (e.g. RGB) and fine-tuning (e.g. infrared) remains underexplored. In this study, we first benchmark the infrared semantic segmentation performance of various pre-training methods and reveal several phenomena distinct from the RGB domain. Next, our layerwise analysis of pre-trained attention maps uncovers that: (1) There are three typical attention patterns (local, hybrid, and global); (2) Pre-training tasks notably influence the pattern distribution across layers; (3) The hybrid pattern is crucial for semantic segmentation as it attends to both nearby and foreground elements; (4) The texture bias impedes model generalization in infrared tasks. Building on these insights, we propose UNIP, a UNified Infrared Pre-training framework, to enhance the pre-trained model performance. This framework uses the hybrid-attention distillation NMI-HAD as the pre-training target, a large-scale mixed dataset InfMix for pre-training, and a last-layer feature pyramid network LL-FPN for fine-tuning. Experimental results show that UNIP outperforms various pre-training methods by up to 13.5\% in average mIoU on three infrared segmentation tasks, evaluated using fine-tuning and linear probing metrics. UNIP-S achieves performance on par with MAE-L while requiring only 1/10 of the computational cost. Furthermore, UNIP significantly surpasses state-of-the-art (SOTA) infrared or RGB segmentation methods and demonstrates broad potential for application in other modalities, such as RGB and depth. Our code is available at https://github.com/casiatao/UNIP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02257v1-abstract-full').style.display = 'none'; document.getElementById('2502.02257v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2025. 27 pages, 13 figures, 21 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01051">arXiv:2502.01051</a> <span> [<a href="https://arxiv.org/pdf/2502.01051">pdf</a>, <a href="https://arxiv.org/format/2502.01051">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Diffusion Model as a Noise-Aware Latent Reward Model for Step-Level Preference Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tao Zhang</a>, <a href="/search/cs?searchtype=author&query=Da%2C+C">Cheng Da</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kun Ding</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+K">Kun Jin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yan Li</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+T">Tingting Gao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Di Zhang</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+S">Shiming Xiang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+C">Chunhong Pan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01051v1-abstract-short" style="display: inline;"> Preference optimization for diffusion models aims to align them with human preferences for images. Previous methods typically leverage Vision-Language Models (VLMs) as pixel-level reward models to approximate human preferences. However, when used for step-level preference optimization, these models face challenges in handling noisy images of different timesteps and require complex transformations… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01051v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01051v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01051v1-abstract-full" style="display: none;"> Preference optimization for diffusion models aims to align them with human preferences for images. Previous methods typically leverage Vision-Language Models (VLMs) as pixel-level reward models to approximate human preferences. However, when used for step-level preference optimization, these models face challenges in handling noisy images of different timesteps and require complex transformations into pixel space. In this work, we demonstrate that diffusion models are inherently well-suited for step-level reward modeling in the latent space, as they can naturally extract features from noisy latent images. Accordingly, we propose the Latent Reward Model (LRM), which repurposes components of diffusion models to predict preferences of latent images at various timesteps. Building on LRM, we introduce Latent Preference Optimization (LPO), a method designed for step-level preference optimization directly in the latent space. Experimental results indicate that LPO not only significantly enhances performance in aligning diffusion models with general, aesthetic, and text-image alignment preferences, but also achieves 2.5-28$\times$ training speedup compared to existing preference optimization methods. Our code will be available at https://github.com/casiatao/LPO. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01051v1-abstract-full').style.display = 'none'; document.getElementById('2502.01051v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 14 tables, 15 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00350">arXiv:2502.00350</a> <span> [<a href="https://arxiv.org/pdf/2502.00350">pdf</a>, <a href="https://arxiv.org/format/2502.00350">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> OrcaLoca: An LLM Agent Framework for Software Issue Localization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhongming Yu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hejia Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yujie Zhao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Hanxian Huang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+M">Matrix Yao</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Ke Ding</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jishen Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00350v1-abstract-short" style="display: inline;"> Recent developments in Large Language Model (LLM) agents are revolutionizing Autonomous Software Engineering (ASE), enabling automated coding, problem fixes, and feature improvements. However, localization -- precisely identifying software problems by navigating to relevant code sections -- remains a significant challenge. Current approaches often yield suboptimal results due to a lack of effectiv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00350v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00350v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00350v1-abstract-full" style="display: none;"> Recent developments in Large Language Model (LLM) agents are revolutionizing Autonomous Software Engineering (ASE), enabling automated coding, problem fixes, and feature improvements. However, localization -- precisely identifying software problems by navigating to relevant code sections -- remains a significant challenge. Current approaches often yield suboptimal results due to a lack of effective integration between LLM agents and precise code search mechanisms. This paper introduces OrcaLoca, an LLM agent framework that improves accuracy for software issue localization by integrating priority-based scheduling for LLM-guided action, action decomposition with relevance scoring, and distance-aware context pruning. Experimental results demonstrate that OrcaLoca becomes the new open-source state-of-the-art (SOTA) in function match rate (65.33%) on SWE-bench Lite. It also improves the final resolved rate of an open-source framework by 6.33 percentage points through its patch generation integration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00350v1-abstract-full').style.display = 'none'; document.getElementById('2502.00350v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.19036">arXiv:2501.19036</a> <span> [<a href="https://arxiv.org/pdf/2501.19036">pdf</a>, <a href="https://arxiv.org/format/2501.19036">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> RedundancyLens: Revealing and Exploiting Visual Token Processing Redundancy for Efficient Decoder-Only MLLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+H">Hongliang Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiaxin Zhang</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+W">Wenhui Liao</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+D">Dezhi Peng</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kai Ding</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+L">Lianwen Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.19036v2-abstract-short" style="display: inline;"> Current Multimodal Large Language Model (MLLM) architectures face a critical tradeoff between performance and efficiency: decoder-only architectures achieve higher performance but lower efficiency, while cross-attention-based architectures offer greater efficiency but lower performance. The key distinction lies in how visual tokens are processed. Decoder-only architectures apply self-attention and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19036v2-abstract-full').style.display = 'inline'; document.getElementById('2501.19036v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.19036v2-abstract-full" style="display: none;"> Current Multimodal Large Language Model (MLLM) architectures face a critical tradeoff between performance and efficiency: decoder-only architectures achieve higher performance but lower efficiency, while cross-attention-based architectures offer greater efficiency but lower performance. The key distinction lies in how visual tokens are processed. Decoder-only architectures apply self-attention and FFN operations on visual tokens, while cross-attention architectures skip these computations. To investigate whether redundancy exists in this computationally expensive process, we propose a training-free framework for analyzing trained MLLMs. It consists of Probe-Activated Dynamic FFN and Hollow Attention, which enable adjustable reductions in computations for visual tokens, as well as a Layer Ranking Algorithm that prioritizes layers for these reductions. Extensive experiments demonstrate substantial, structured, and clustered redundancy unique to decoder-only MLLMs, offering valuable insights for future MLLM architecture design. Furthermore, by leveraging our reduction framework as a training-free inference acceleration approach, we achieve performance comparable to or better than state-of-the-art methods while remaining compatible with them. Code will be publicly available at https://github.com/L-Hugh/RedundancyLens. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19036v2-abstract-full').style.display = 'none'; document.getElementById('2501.19036v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17642">arXiv:2501.17642</a> <span> [<a href="https://arxiv.org/pdf/2501.17642">pdf</a>, <a href="https://arxiv.org/format/2501.17642">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Efficient Redundancy Reduction for Open-Vocabulary Semantic Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+L">Lin Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Q">Qi Yang</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kun Ding</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhihao Li</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+G">Gang Shen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+F">Fei Li</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Q">Qiyuan Cao</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+S">Shiming Xiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17642v1-abstract-short" style="display: inline;"> Open-vocabulary semantic segmentation (OVSS) is an open-world task that aims to assign each pixel within an image to a specific class defined by arbitrary text descriptions. Recent advancements in large-scale vision-language models have demonstrated their open-vocabulary understanding capabilities, significantly facilitating the development of OVSS. However, most existing methods suffer from eithe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17642v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17642v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17642v1-abstract-full" style="display: none;"> Open-vocabulary semantic segmentation (OVSS) is an open-world task that aims to assign each pixel within an image to a specific class defined by arbitrary text descriptions. Recent advancements in large-scale vision-language models have demonstrated their open-vocabulary understanding capabilities, significantly facilitating the development of OVSS. However, most existing methods suffer from either suboptimal performance or long latency. This study introduces ERR-Seg, a novel framework that effectively reduces redundancy to balance accuracy and efficiency. ERR-Seg incorporates a training-free Channel Reduction Module (CRM) that leverages prior knowledge from vision-language models like CLIP to identify the most relevant classes while discarding others. Moreover, it incorporates Efficient Semantic Context Fusion (ESCF) with spatial-level and class-level sequence reduction strategies. CRM and ESCF result in substantial memory and computational savings without compromising accuracy. Additionally, recognizing the significance of hierarchical semantics extracted from middle-layer features for closed-set semantic segmentation, ERR-Seg introduces the Hierarchical Semantic Module (HSM) to exploit hierarchical semantics in the context of OVSS. Compared to previous state-of-the-art methods under the ADE20K-847 setting, ERR-Seg achieves +$5.6\%$ mIoU improvement and reduces latency by $67.3\%$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17642v1-abstract-full').style.display = 'none'; document.getElementById('2501.17642v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03410">arXiv:2501.03410</a> <span> [<a href="https://arxiv.org/pdf/2501.03410">pdf</a>, <a href="https://arxiv.org/format/2501.03410">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ScaleMAI: Accelerating the Development of Trusted Datasets and AI Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenxuan Li</a>, <a href="/search/cs?searchtype=author&query=Bassi%2C+P+R+A+S">Pedro R. A. S. Bassi</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+T">Tianyu Lin</a>, <a href="/search/cs?searchtype=author&query=Chou%2C+Y">Yu-Cheng Chou</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xinze Zhou</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yucheng Tang</a>, <a href="/search/cs?searchtype=author&query=Isensee%2C+F">Fabian Isensee</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+K">Kang Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Q">Qi Chen</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiaowei Xu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiaoxi Chen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+L">Lizhou Wu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Q">Qilong Wu</a>, <a href="/search/cs?searchtype=author&query=Kirchhoff%2C+Y">Yannick Kirchhoff</a>, <a href="/search/cs?searchtype=author&query=Rokuss%2C+M">Maximilian Rokuss</a>, <a href="/search/cs?searchtype=author&query=Roy%2C+S">Saikat Roy</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yuxuan Zhao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dexin Yu</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kai Ding</a>, <a href="/search/cs?searchtype=author&query=Ulrich%2C+C">Constantin Ulrich</a>, <a href="/search/cs?searchtype=author&query=Maier-Hein%2C+K">Klaus Maier-Hein</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yang Yang</a>, <a href="/search/cs?searchtype=author&query=Yuille%2C+A+L">Alan L. Yuille</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Zongwei Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03410v1-abstract-short" style="display: inline;"> Building trusted datasets is critical for transparent and responsible Medical AI (MAI) research, but creating even small, high-quality datasets can take years of effort from multidisciplinary teams. This process often delays AI benefits, as human-centric data creation and AI-centric model development are treated as separate, sequential steps. To overcome this, we propose ScaleMAI, an agent of AI-i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03410v1-abstract-full').style.display = 'inline'; document.getElementById('2501.03410v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03410v1-abstract-full" style="display: none;"> Building trusted datasets is critical for transparent and responsible Medical AI (MAI) research, but creating even small, high-quality datasets can take years of effort from multidisciplinary teams. This process often delays AI benefits, as human-centric data creation and AI-centric model development are treated as separate, sequential steps. To overcome this, we propose ScaleMAI, an agent of AI-integrated data curation and annotation, allowing data quality and AI performance to improve in a self-reinforcing cycle and reducing development time from years to months. We adopt pancreatic tumor detection as an example. First, ScaleMAI progressively creates a dataset of 25,362 CT scans, including per-voxel annotations for benign/malignant tumors and 24 anatomical structures. Second, through progressive human-in-the-loop iterations, ScaleMAI provides Flagship AI Model that can approach the proficiency of expert annotators (30-year experience) in detecting pancreatic tumors. Flagship Model significantly outperforms models developed from smaller, fixed-quality datasets, with substantial gains in tumor detection (+14%), segmentation (+5%), and classification (72%) on three prestigious benchmarks. In summary, ScaleMAI transforms the speed, scale, and reliability of medical dataset creation, paving the way for a variety of impactful, data-driven applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03410v1-abstract-full').style.display = 'none'; document.getElementById('2501.03410v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.14922">arXiv:2412.14922</a> <span> [<a href="https://arxiv.org/pdf/2412.14922">pdf</a>, <a href="https://arxiv.org/format/2412.14922">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> RobustFT: Robust Supervised Fine-tuning for Large Language Models under Noisy Response </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+J">Junyu Luo</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+X">Xiao Luo</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaize Ding</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+J">Jingyang Yuan</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Z">Zhiping Xiao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Ming Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.14922v1-abstract-short" style="display: inline;"> Supervised fine-tuning (SFT) plays a crucial role in adapting large language models (LLMs) to specific domains or tasks. However, as demonstrated by empirical experiments, the collected data inevitably contains noise in practical applications, which poses significant challenges to model performance on downstream tasks. Therefore, there is an urgent need for a noise-robust SFT framework to enhance… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14922v1-abstract-full').style.display = 'inline'; document.getElementById('2412.14922v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.14922v1-abstract-full" style="display: none;"> Supervised fine-tuning (SFT) plays a crucial role in adapting large language models (LLMs) to specific domains or tasks. However, as demonstrated by empirical experiments, the collected data inevitably contains noise in practical applications, which poses significant challenges to model performance on downstream tasks. Therefore, there is an urgent need for a noise-robust SFT framework to enhance model capabilities in downstream tasks. To address this challenge, we introduce a robust SFT framework (RobustFT) that performs noise detection and relabeling on downstream task data. For noise identification, our approach employs a multi-expert collaborative system with inference-enhanced models to achieve superior noise detection. In the denoising phase, we utilize a context-enhanced strategy, which incorporates the most relevant and confident knowledge followed by careful assessment to generate reliable annotations. Additionally, we introduce an effective data selection mechanism based on response entropy, ensuring only high-quality samples are retained for fine-tuning. Extensive experiments conducted on multiple LLMs across five datasets demonstrate RobustFT's exceptional performance in noisy scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14922v1-abstract-full').style.display = 'none'; document.getElementById('2412.14922v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.12150">arXiv:2412.12150</a> <span> [<a href="https://arxiv.org/pdf/2412.12150">pdf</a>, <a href="https://arxiv.org/format/2412.12150">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Rethinking Comprehensive Benchmark for Chart Understanding: A Perspective from Scientific Literature </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shen%2C+L">Lingdong Shen</a>, <a href="/search/cs?searchtype=author&query=Qigqi"> Qigqi</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kun Ding</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+G">Gaofeng Meng</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+S">Shiming Xiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.12150v1-abstract-short" style="display: inline;"> Scientific Literature charts often contain complex visual elements, including multi-plot figures, flowcharts, structural diagrams and etc. Evaluating multimodal models using these authentic and intricate charts provides a more accurate assessment of their understanding abilities. However, existing benchmarks face limitations: a narrow range of chart types, overly simplistic template-based question… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12150v1-abstract-full').style.display = 'inline'; document.getElementById('2412.12150v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.12150v1-abstract-full" style="display: none;"> Scientific Literature charts often contain complex visual elements, including multi-plot figures, flowcharts, structural diagrams and etc. Evaluating multimodal models using these authentic and intricate charts provides a more accurate assessment of their understanding abilities. However, existing benchmarks face limitations: a narrow range of chart types, overly simplistic template-based questions and visual elements, and inadequate evaluation methods. These shortcomings lead to inflated performance scores that fail to hold up when models encounter real-world scientific charts. To address these challenges, we introduce a new benchmark, Scientific Chart QA (SCI-CQA), which emphasizes flowcharts as a critical yet often overlooked category. To overcome the limitations of chart variety and simplistic visual elements, we curated a dataset of 202,760 image-text pairs from 15 top-tier computer science conferences papers over the past decade. After rigorous filtering, we refined this to 37,607 high-quality charts with contextual information. SCI-CQA also introduces a novel evaluation framework inspired by human exams, encompassing 5,629 carefully curated questions, both objective and open-ended. Additionally, we propose an efficient annotation pipeline that significantly reduces data annotation costs. Finally, we explore context-based chart understanding, highlighting the crucial role of contextual information in solving previously unanswerable questions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12150v1-abstract-full').style.display = 'none'; document.getElementById('2412.12150v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.11142">arXiv:2412.11142</a> <span> [<a href="https://arxiv.org/pdf/2412.11142">pdf</a>, <a href="https://arxiv.org/format/2412.11142">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AD-LLM: Benchmarking Large Language Models for Anomaly Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+T">Tiankai Yang</a>, <a href="/search/cs?searchtype=author&query=Nian%2C+Y">Yi Nian</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shawn Li</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+R">Ruiyao Xu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuangang Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiaqi Li</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Z">Zhuo Xiao</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xiyang Hu</a>, <a href="/search/cs?searchtype=author&query=Rossi%2C+R">Ryan Rossi</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaize Ding</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xia Hu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yue Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.11142v1-abstract-short" style="display: inline;"> Anomaly detection (AD) is an important machine learning task with many real-world uses, including fraud detection, medical diagnosis, and industrial monitoring. Within natural language processing (NLP), AD helps detect issues like spam, misinformation, and unusual user activity. Although large language models (LLMs) have had a strong impact on tasks such as text generation and summarization, their… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11142v1-abstract-full').style.display = 'inline'; document.getElementById('2412.11142v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.11142v1-abstract-full" style="display: none;"> Anomaly detection (AD) is an important machine learning task with many real-world uses, including fraud detection, medical diagnosis, and industrial monitoring. Within natural language processing (NLP), AD helps detect issues like spam, misinformation, and unusual user activity. Although large language models (LLMs) have had a strong impact on tasks such as text generation and summarization, their potential in AD has not been studied enough. This paper introduces AD-LLM, the first benchmark that evaluates how LLMs can help with NLP anomaly detection. We examine three key tasks: (i) zero-shot detection, using LLMs' pre-trained knowledge to perform AD without tasks-specific training; (ii) data augmentation, generating synthetic data and category descriptions to improve AD models; and (iii) model selection, using LLMs to suggest unsupervised AD models. Through experiments with different datasets, we find that LLMs can work well in zero-shot AD, that carefully designed augmentation methods are useful, and that explaining model selection for specific datasets remains challenging. Based on these results, we outline six future research directions on LLMs for AD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11142v1-abstract-full').style.display = 'none'; document.getElementById('2412.11142v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.10663">arXiv:2412.10663</a> <span> [<a href="https://arxiv.org/pdf/2412.10663">pdf</a>, <a href="https://arxiv.org/format/2412.10663">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Memory-Efficient 4-bit Preconditioned Stochastic Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+J">Jingyang Li</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kuangyu Ding</a>, <a href="/search/cs?searchtype=author&query=Toh%2C+K">Kim-Chuan Toh</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+P">Pan Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.10663v1-abstract-short" style="display: inline;"> Preconditioned stochastic optimization algorithms, exemplified by Shampoo, have demonstrated superior performance over first-order optimizers, providing both theoretical advantages in convergence rates and practical improvements in large-scale neural network training. However, they incur substantial memory overhead due to the storage demands of non-diagonal preconditioning matrices. To address thi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10663v1-abstract-full').style.display = 'inline'; document.getElementById('2412.10663v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.10663v1-abstract-full" style="display: none;"> Preconditioned stochastic optimization algorithms, exemplified by Shampoo, have demonstrated superior performance over first-order optimizers, providing both theoretical advantages in convergence rates and practical improvements in large-scale neural network training. However, they incur substantial memory overhead due to the storage demands of non-diagonal preconditioning matrices. To address this, we introduce 4-bit quantization for Shampoo's preconditioners. We introduced two key methods: First, we apply Cholesky decomposition followed by quantization of the Cholesky factors, reducing memory usage by leveraging their lower triangular structure while preserving symmetry and positive definiteness to minimize information loss. To our knowledge, this is the first quantization approach applied to Cholesky factors of preconditioners. Second, we incorporate error feedback in the quantization process, efficiently storing Cholesky factors and error states in the lower and upper triangular parts of the same matrix. Through extensive experiments, we demonstrate that combining Cholesky quantization with error feedback enhances memory efficiency and algorithm performance in large-scale deep-learning tasks. Theoretically, we also provide convergence proofs for quantized Shampoo under both smooth and non-smooth stochastic optimization settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10663v1-abstract-full').style.display = 'none'; document.getElementById('2412.10663v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.06864">arXiv:2412.06864</a> <span> [<a href="https://arxiv.org/pdf/2412.06864">pdf</a>, <a href="https://arxiv.org/format/2412.06864">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Political-LLM: Large Language Models in Political Science </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+L">Lincan Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiaqi Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Catherine Chen</a>, <a href="/search/cs?searchtype=author&query=Gui%2C+F">Fred Gui</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hongjia Yang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+C">Chenxiao Yu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhengguang Wang</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+J">Jianing Cai</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J+A">Junlong Aaron Zhou</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+B">Bolin Shen</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+A">Alex Qian</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Weixin Chen</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+Z">Zhongkai Xue</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+L">Lichao Sun</a>, <a href="/search/cs?searchtype=author&query=He%2C+L">Lifang He</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hanjie Chen</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaize Ding</a>, <a href="/search/cs?searchtype=author&query=Du%2C+Z">Zijian Du</a>, <a href="/search/cs?searchtype=author&query=Mu%2C+F">Fangzhou Mu</a>, <a href="/search/cs?searchtype=author&query=Pei%2C+J">Jiaxin Pei</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jieyu Zhao</a>, <a href="/search/cs?searchtype=author&query=Swayamdipta%2C+S">Swabha Swayamdipta</a>, <a href="/search/cs?searchtype=author&query=Neiswanger%2C+W">Willie Neiswanger</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+H">Hua Wei</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xiyang Hu</a> , et al. (22 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.06864v1-abstract-short" style="display: inline;"> In recent years, large language models (LLMs) have been widely adopted in political science tasks such as election prediction, sentiment analysis, policy impact assessment, and misinformation detection. Meanwhile, the need to systematically understand how LLMs can further revolutionize the field also becomes urgent. In this work, we--a multidisciplinary team of researchers spanning computer scienc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06864v1-abstract-full').style.display = 'inline'; document.getElementById('2412.06864v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.06864v1-abstract-full" style="display: none;"> In recent years, large language models (LLMs) have been widely adopted in political science tasks such as election prediction, sentiment analysis, policy impact assessment, and misinformation detection. Meanwhile, the need to systematically understand how LLMs can further revolutionize the field also becomes urgent. In this work, we--a multidisciplinary team of researchers spanning computer science and political science--present the first principled framework termed Political-LLM to advance the comprehensive understanding of integrating LLMs into computational political science. Specifically, we first introduce a fundamental taxonomy classifying the existing explorations into two perspectives: political science and computational methodologies. In particular, from the political science perspective, we highlight the role of LLMs in automating predictive and generative tasks, simulating behavior dynamics, and improving causal inference through tools like counterfactual generation; from a computational perspective, we introduce advancements in data preparation, fine-tuning, and evaluation methods for LLMs that are tailored to political contexts. We identify key challenges and future directions, emphasizing the development of domain-specific datasets, addressing issues of bias and fairness, incorporating human expertise, and redefining evaluation criteria to align with the unique requirements of computational political science. Political-LLM seeks to serve as a guidebook for researchers to foster an informed, ethical, and impactful use of Artificial Intelligence in political science. Our online resource is available at: http://political-llm.org/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06864v1-abstract-full').style.display = 'none'; document.getElementById('2412.06864v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">54 Pages, 9 Figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15731">arXiv:2411.15731</a> <span> [<a href="https://arxiv.org/pdf/2411.15731">pdf</a>, <a href="https://arxiv.org/format/2411.15731">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Fusion Matters: Learning Fusion in Deep Click-through Rate Prediction Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kexin Zhang</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xing Tang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Dugang Liu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+C">Chen Ma</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaize Ding</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xiuqiang He</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xue Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15731v1-abstract-short" style="display: inline;"> The evolution of previous Click-Through Rate (CTR) models has mainly been driven by proposing complex components, whether shallow or deep, that are adept at modeling feature interactions. However, there has been less focus on improving fusion design. Instead, two naive solutions, stacked and parallel fusion, are commonly used. Both solutions rely on pre-determined fusion connections and fixed fusi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15731v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15731v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15731v1-abstract-full" style="display: none;"> The evolution of previous Click-Through Rate (CTR) models has mainly been driven by proposing complex components, whether shallow or deep, that are adept at modeling feature interactions. However, there has been less focus on improving fusion design. Instead, two naive solutions, stacked and parallel fusion, are commonly used. Both solutions rely on pre-determined fusion connections and fixed fusion operations. It has been repetitively observed that changes in fusion design may result in different performances, highlighting the critical role that fusion plays in CTR models. While there have been attempts to refine these basic fusion strategies, these efforts have often been constrained to specific settings or dependent on specific components. Neural architecture search has also been introduced to partially deal with fusion design, but it comes with limitations. The complexity of the search space can lead to inefficient and ineffective results. To bridge this gap, we introduce OptFusion, a method that automates the learning of fusion, encompassing both the connection learning and the operation selection. We have proposed a one-shot learning algorithm tackling these tasks concurrently. Our experiments are conducted over three large-scale datasets. Extensive experiments prove both the effectiveness and efficiency of OptFusion in improving CTR model performance. Our code implementation is available here\url{https://github.com/kexin-kxzhang/OptFusion}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15731v1-abstract-full').style.display = 'none'; document.getElementById('2411.15731v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by WSDM 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11925">arXiv:2411.11925</a> <span> [<a href="https://arxiv.org/pdf/2411.11925">pdf</a>, <a href="https://arxiv.org/format/2411.11925">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Continuous Speculative Decoding for Autoregressive Image Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zili Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Robert Zhang</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kun Ding</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Q">Qi Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+F">Fei Li</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+S">Shiming Xiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11925v1-abstract-short" style="display: inline;"> Continuous-valued Autoregressive (AR) image generation models have demonstrated notable superiority over their discrete-token counterparts, showcasing considerable reconstruction quality and higher generation fidelity. However, the computational demands of the autoregressive framework result in significant inference overhead. While speculative decoding has proven effective in accelerating Large La… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11925v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11925v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11925v1-abstract-full" style="display: none;"> Continuous-valued Autoregressive (AR) image generation models have demonstrated notable superiority over their discrete-token counterparts, showcasing considerable reconstruction quality and higher generation fidelity. However, the computational demands of the autoregressive framework result in significant inference overhead. While speculative decoding has proven effective in accelerating Large Language Models (LLMs), their adaptation to continuous-valued visual autoregressive models remains unexplored. This work generalizes the speculative decoding algorithm from discrete tokens to continuous space. By analyzing the intrinsic properties of output distribution, we establish a tailored acceptance criterion for the diffusion distributions prevalent in such models. To overcome the inconsistency that occurred in speculative decoding output distributions, we introduce denoising trajectory alignment and token pre-filling methods. Additionally, we identify the hard-to-sample distribution in the rejection phase. To mitigate this issue, we propose a meticulous acceptance-rejection sampling method with a proper upper bound, thereby circumventing complex integration. Experimental results show that our continuous speculative decoding achieves a remarkable $2.33\times$ speed-up on off-the-shelf models while maintaining the output distribution. Codes will be available at https://github.com/MarkXCloud/CSpD <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11925v1-abstract-full').style.display = 'none'; document.getElementById('2411.11925v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19265">arXiv:2410.19265</a> <span> [<a href="https://arxiv.org/pdf/2410.19265">pdf</a>, <a href="https://arxiv.org/format/2410.19265">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Survey of Deep Graph Learning under Distribution Shifts: from Graph Out-of-Distribution Generalization to Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kexin Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shuhan Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Song Wang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Weili Shi</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chen Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Pan Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Sheng Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jundong Li</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaize Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19265v1-abstract-short" style="display: inline;"> Distribution shifts on graphs -- the discrepancies in data distribution between training and employing a graph machine learning model -- are ubiquitous and often unavoidable in real-world scenarios. These shifts may severely deteriorate model performance, posing significant challenges for reliable graph machine learning. Consequently, there has been a surge in research on graph machine learning un… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19265v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19265v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19265v1-abstract-full" style="display: none;"> Distribution shifts on graphs -- the discrepancies in data distribution between training and employing a graph machine learning model -- are ubiquitous and often unavoidable in real-world scenarios. These shifts may severely deteriorate model performance, posing significant challenges for reliable graph machine learning. Consequently, there has been a surge in research on graph machine learning under distribution shifts, aiming to train models to achieve satisfactory performance on out-of-distribution (OOD) test data. In our survey, we provide an up-to-date and forward-looking review of deep graph learning under distribution shifts. Specifically, we cover three primary scenarios: graph OOD generalization, training-time graph OOD adaptation, and test-time graph OOD adaptation. We begin by formally formulating the problems and discussing various types of distribution shifts that can affect graph learning, such as covariate shifts and concept shifts. To provide a better understanding of the literature, we systematically categorize the existing models based on our proposed taxonomy and investigate the adopted techniques behind. We also summarize commonly used datasets in this research area to facilitate further investigation. Finally, we point out promising research directions and the corresponding challenges to encourage further study in this vital domain. Additionally, we provide a continuously updated reading list at https://github.com/kaize0409/Awesome-Graph-OOD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19265v1-abstract-full').style.display = 'none'; document.getElementById('2410.19265v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 2 figures. arXiv admin note: text overlap with arXiv:2402.11153</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16386">arXiv:2410.16386</a> <span> [<a href="https://arxiv.org/pdf/2410.16386">pdf</a>, <a href="https://arxiv.org/format/2410.16386">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> LEGO-Learn: Label-Efficient Graph Open-Set Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+H">Haoyan Xu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kay Liu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+Z">Zhengtao Yao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+P+S">Philip S. Yu</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaize Ding</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yue Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16386v1-abstract-short" style="display: inline;"> How can we train graph-based models to recognize unseen classes while keeping labeling costs low? Graph open-set learning (GOL) and out-of-distribution (OOD) detection aim to address this challenge by training models that can accurately classify known, in-distribution (ID) classes while identifying and handling previously unseen classes during inference. It is critical for high-stakes, real-world… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16386v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16386v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16386v1-abstract-full" style="display: none;"> How can we train graph-based models to recognize unseen classes while keeping labeling costs low? Graph open-set learning (GOL) and out-of-distribution (OOD) detection aim to address this challenge by training models that can accurately classify known, in-distribution (ID) classes while identifying and handling previously unseen classes during inference. It is critical for high-stakes, real-world applications where models frequently encounter unexpected data, including finance, security, and healthcare. However, current GOL methods assume access to many labeled ID samples, which is unrealistic for large-scale graphs due to high annotation costs. In this paper, we propose LEGO-Learn (Label-Efficient Graph Open-set Learning), a novel framework that tackles open-set node classification on graphs within a given label budget by selecting the most informative ID nodes. LEGO-Learn employs a GNN-based filter to identify and exclude potential OOD nodes and then select highly informative ID nodes for labeling using the K-Medoids algorithm. To prevent the filter from discarding valuable ID examples, we introduce a classifier that differentiates between the C known ID classes and an additional class representing OOD nodes (hence, a C+1 classifier). This classifier uses a weighted cross-entropy loss to balance the removal of OOD nodes while retaining informative ID nodes. Experimental results on four real-world datasets demonstrate that LEGO-Learn significantly outperforms leading methods, with up to a 6.62% improvement in ID classification accuracy and a 7.49% increase in AUROC for OOD detection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16386v1-abstract-full').style.display = 'none'; document.getElementById('2410.16386v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint. Under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11686">arXiv:2410.11686</a> <span> [<a href="https://arxiv.org/pdf/2410.11686">pdf</a>, <a href="https://arxiv.org/format/2410.11686">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Survey of Low-shot Vision-Language Model Adaptation via Representer Theorem </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kun Ding</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Ying Wang</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+G">Gaofeng Meng</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+S">Shiming Xiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11686v1-abstract-short" style="display: inline;"> The advent of pre-trained vision-language foundation models has revolutionized the field of zero/few-shot (i.e., low-shot) image recognition. The key challenge to address under the condition of limited training data is how to fine-tune pre-trained vision-language models in a parameter-efficient manner. Previously, numerous approaches tackling this challenge have been proposed. Meantime, a few surv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11686v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11686v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11686v1-abstract-full" style="display: none;"> The advent of pre-trained vision-language foundation models has revolutionized the field of zero/few-shot (i.e., low-shot) image recognition. The key challenge to address under the condition of limited training data is how to fine-tune pre-trained vision-language models in a parameter-efficient manner. Previously, numerous approaches tackling this challenge have been proposed. Meantime, a few survey papers are also published to summarize these works. However, there still lacks a unified computational framework to integrate existing methods together, identify their nature and support in-depth comparison. As such, this survey paper first proposes a unified computational framework from the perspective of Representer Theorem and then derives many of the existing methods by specializing this framework. Thereafter, a comparative analysis is conducted to uncover the differences and relationships between existing methods. Based on the analyses, some possible variants to improve the existing works are presented. As a demonstration, we extend existing methods by modeling inter-class correlation between representers in reproducing kernel Hilbert space (RKHS), which is implemented by exploiting the closed-form solution of kernel ridge regression. Extensive experiments on 11 datasets are conducted to validate the effectiveness of this method. Toward the end of this paper, we discuss the limitations and provide further research directions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11686v1-abstract-full').style.display = 'none'; document.getElementById('2410.11686v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08895">arXiv:2410.08895</a> <span> [<a href="https://arxiv.org/pdf/2410.08895">pdf</a>, <a href="https://arxiv.org/format/2410.08895">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Calibrated Cache Model for Few-Shot Vision-Language Model Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kun Ding</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Q">Qiang Yu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haojian Zhang</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+G">Gaofeng Meng</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+S">Shiming Xiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08895v1-abstract-short" style="display: inline;"> Cache-based approaches stand out as both effective and efficient for adapting vision-language models (VLMs). Nonetheless, the existing cache model overlooks three crucial aspects. 1) Pre-trained VLMs are mainly optimized for image-text similarity, neglecting the importance of image-image similarity, leading to a gap between pre-training and adaptation. 2) The current cache model is based on the Na… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08895v1-abstract-full').style.display = 'inline'; document.getElementById('2410.08895v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08895v1-abstract-full" style="display: none;"> Cache-based approaches stand out as both effective and efficient for adapting vision-language models (VLMs). Nonetheless, the existing cache model overlooks three crucial aspects. 1) Pre-trained VLMs are mainly optimized for image-text similarity, neglecting the importance of image-image similarity, leading to a gap between pre-training and adaptation. 2) The current cache model is based on the Nadaraya-Watson (N-W) estimator, which disregards the intricate relationships among training samples while constructing weight function. 3) Under the condition of limited samples, the logits generated by cache model are of high uncertainty, directly using these logits without accounting for the confidence could be problematic. This work presents three calibration modules aimed at addressing the above challenges. Similarity Calibration refines the image-image similarity by using unlabeled images. We add a learnable projection layer with residual connection on top of the pre-trained image encoder of CLIP and optimize the parameters by minimizing self-supervised contrastive loss. Weight Calibration introduces a precision matrix into the weight function to adequately model the relation between training samples, transforming the existing cache model to a Gaussian Process (GP) regressor, which could be more accurate than N-W estimator. Confidence Calibration leverages the predictive variances computed by GP Regression to dynamically re-scale the logits of cache model, ensuring that the cache model's outputs are appropriately adjusted based on their confidence levels. Besides, to reduce the high complexity of GPs, we further propose a group-based learning strategy. Integrating the above designs, we propose both training-free and training-required variants. Extensive experiments on 11 few-shot classification datasets validate that the proposed methods can achieve state-of-the-art performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08895v1-abstract-full').style.display = 'none'; document.getElementById('2410.08895v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to IJCV</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07919">arXiv:2410.07919</a> <span> [<a href="https://arxiv.org/pdf/2410.07919">pdf</a>, <a href="https://arxiv.org/format/2410.07919">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> InstructBioMol: Advancing Biomolecule Understanding and Design Following Human Instructions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhuang%2C+X">Xiang Zhuang</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Keyan Ding</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+T">Tianwen Lyu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yinuo Jiang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaotong Li</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+Z">Zhuoyi Xiang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zeyuan Wang</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+M">Ming Qin</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+K">Kehua Feng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jike Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Huajun Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07919v1-abstract-short" style="display: inline;"> Understanding and designing biomolecules, such as proteins and small molecules, is central to advancing drug discovery, synthetic biology, and enzyme engineering. Recent breakthroughs in Artificial Intelligence (AI) have revolutionized biomolecular research, achieving remarkable accuracy in biomolecular prediction and design. However, a critical gap remains between AI's computational power and res… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07919v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07919v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07919v1-abstract-full" style="display: none;"> Understanding and designing biomolecules, such as proteins and small molecules, is central to advancing drug discovery, synthetic biology, and enzyme engineering. Recent breakthroughs in Artificial Intelligence (AI) have revolutionized biomolecular research, achieving remarkable accuracy in biomolecular prediction and design. However, a critical gap remains between AI's computational power and researchers' intuition, using natural language to align molecular complexity with human intentions. Large Language Models (LLMs) have shown potential to interpret human intentions, yet their application to biomolecular research remains nascent due to challenges including specialized knowledge requirements, multimodal data integration, and semantic alignment between natural language and biomolecules. To address these limitations, we present InstructBioMol, a novel LLM designed to bridge natural language and biomolecules through a comprehensive any-to-any alignment of natural language, molecules, and proteins. This model can integrate multimodal biomolecules as input, and enable researchers to articulate design goals in natural language, providing biomolecular outputs that meet precise biological needs. Experimental results demonstrate InstructBioMol can understand and design biomolecules following human instructions. Notably, it can generate drug molecules with a 10% improvement in binding affinity and design enzymes that achieve an ESP Score of 70.4, making it the only method to surpass the enzyme-substrate interaction threshold of 60.0 recommended by the ESP developer. This highlights its potential to transform real-world biomolecular research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07919v1-abstract-full').style.display = 'none'; document.getElementById('2410.07919v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07074">arXiv:2410.07074</a> <span> [<a href="https://arxiv.org/pdf/2410.07074">pdf</a>, <a href="https://arxiv.org/format/2410.07074">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Let's Ask GNN: Empowering Large Language Model for Graph In-Context Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+Z">Zhengyu Hu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yichuan Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhengyu Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jingang Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Han Liu</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+K">Kyumin Lee</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaize Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07074v2-abstract-short" style="display: inline;"> Textual Attributed Graphs (TAGs) are crucial for modeling complex real-world systems, yet leveraging large language models (LLMs) for TAGs presents unique challenges due to the gap between sequential text processing and graph-structured data. We introduce AskGNN, a novel approach that bridges this gap by leveraging In-Context Learning (ICL) to integrate graph data and task-specific information int… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07074v2-abstract-full').style.display = 'inline'; document.getElementById('2410.07074v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07074v2-abstract-full" style="display: none;"> Textual Attributed Graphs (TAGs) are crucial for modeling complex real-world systems, yet leveraging large language models (LLMs) for TAGs presents unique challenges due to the gap between sequential text processing and graph-structured data. We introduce AskGNN, a novel approach that bridges this gap by leveraging In-Context Learning (ICL) to integrate graph data and task-specific information into LLMs. AskGNN employs a Graph Neural Network (GNN)-powered structure-enhanced retriever to select labeled nodes across graphs, incorporating complex graph structures and their supervision signals. Our learning-to-retrieve algorithm optimizes the retriever to select example nodes that maximize LLM performance on graph. Experiments across three tasks and seven LLMs demonstrate AskGNN's superior effectiveness in graph task performance, opening new avenues for applying LLMs to graph-structured data without extensive fine-tuning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07074v2-abstract-full').style.display = 'none'; document.getElementById('2410.07074v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03769">arXiv:2410.03769</a> <span> [<a href="https://arxiv.org/pdf/2410.03769">pdf</a>, <a href="https://arxiv.org/format/2410.03769">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> SciSafeEval: A Comprehensive Benchmark for Safety Alignment of Large Language Models in Scientific Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+T">Tianhao Li</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+J">Jingyu Lu</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+C">Chuangxin Chu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+T">Tianyu Zeng</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yujia Zheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Mei Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Haotian Huang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+B">Bin Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zuoxian Liu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+K">Kai Ma</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+X">Xuejing Yuan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xingkai Wang</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Keyan Ding</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Huajun Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qiang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03769v2-abstract-short" style="display: inline;"> Large language models (LLMs) have a transformative impact on a variety of scientific tasks across disciplines including biology, chemistry, medicine, and physics. However, ensuring the safety alignment of these models in scientific research remains an underexplored area, with existing benchmarks primarily focusing on textual content and overlooking key scientific representations such as molecular,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03769v2-abstract-full').style.display = 'inline'; document.getElementById('2410.03769v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03769v2-abstract-full" style="display: none;"> Large language models (LLMs) have a transformative impact on a variety of scientific tasks across disciplines including biology, chemistry, medicine, and physics. However, ensuring the safety alignment of these models in scientific research remains an underexplored area, with existing benchmarks primarily focusing on textual content and overlooking key scientific representations such as molecular, protein, and genomic languages. Moreover, the safety mechanisms of LLMs in scientific tasks are insufficiently studied. To address these limitations, we introduce SciSafeEval, a comprehensive benchmark designed to evaluate the safety alignment of LLMs across a range of scientific tasks. SciSafeEval spans multiple scientific languages-including textual, molecular, protein, and genomic-and covers a wide range of scientific domains. We evaluate LLMs in zero-shot, few-shot and chain-of-thought settings, and introduce a "jailbreak" enhancement feature that challenges LLMs equipped with safety guardrails, rigorously testing their defenses against malicious intention. Our benchmark surpasses existing safety datasets in both scale and scope, providing a robust platform for assessing the safety and performance of LLMs in scientific contexts. This work aims to facilitate the responsible development and deployment of LLMs, promoting alignment with safety and ethical standards in scientific research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03769v2-abstract-full').style.display = 'none'; document.getElementById('2410.03769v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02694">arXiv:2410.02694</a> <span> [<a href="https://arxiv.org/pdf/2410.02694">pdf</a>, <a href="https://arxiv.org/format/2410.02694">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> HELMET: How to Evaluate Long-Context Language Models Effectively and Thoroughly </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yen%2C+H">Howard Yen</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+T">Tianyu Gao</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+M">Minmin Hou</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Ke Ding</a>, <a href="/search/cs?searchtype=author&query=Fleischer%2C+D">Daniel Fleischer</a>, <a href="/search/cs?searchtype=author&query=Izsak%2C+P">Peter Izsak</a>, <a href="/search/cs?searchtype=author&query=Wasserblat%2C+M">Moshe Wasserblat</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+D">Danqi Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02694v2-abstract-short" style="display: inline;"> There have been many benchmarks for evaluating long-context language models (LCLMs), but developers often rely on synthetic tasks like needle-in-a-haystack (NIAH) or arbitrary subsets of tasks. It remains unclear whether they translate to the diverse downstream applications of LCLMs, and the inconsistency further complicates model comparison. We investigate the underlying reasons behind current pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02694v2-abstract-full').style.display = 'inline'; document.getElementById('2410.02694v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02694v2-abstract-full" style="display: none;"> There have been many benchmarks for evaluating long-context language models (LCLMs), but developers often rely on synthetic tasks like needle-in-a-haystack (NIAH) or arbitrary subsets of tasks. It remains unclear whether they translate to the diverse downstream applications of LCLMs, and the inconsistency further complicates model comparison. We investigate the underlying reasons behind current practices and find that existing benchmarks often provide noisy signals due to low coverage of applications, insufficient lengths, unreliable metrics, and incompatibility with base models. In this work, we present HELMET (How to Evaluate Long-context Models Effectively and Thoroughly), a comprehensive benchmark encompassing seven diverse, application-centric categories. We also address many issues in previous benchmarks by adding controllable lengths up to 128k tokens, model-based evaluation for reliable metrics, and few-shot prompting for robustly evaluating base models. Consequently, we demonstrate that HELMET offers more reliable and consistent rankings of frontier LCLMs. Through a comprehensive study of 51 LCLMs, we find that (1) synthetic tasks like NIAH are not good predictors of downstream performance; (2) the diverse categories in HELMET exhibit distinct trends and low correlation with each other; and (3) while most LCLMs achieve perfect NIAH scores, open-source models significantly lag behind closed ones when the task requires full-context reasoning or following complex instructions -- the gap widens with increased lengths. Finally, we recommend using our RAG tasks for fast model development, as they are easy to run and more predictive of other downstream performance; ultimately, we advocate for a holistic evaluation across diverse tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02694v2-abstract-full').style.display = 'none'; document.getElementById('2410.02694v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Code and data are available here: https://github.com/princeton-nlp/HELMET</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16278">arXiv:2409.16278</a> <span> [<a href="https://arxiv.org/pdf/2409.16278">pdf</a>, <a href="https://arxiv.org/format/2409.16278">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Adapting Vision-Language Model with Fine-grained Semantics for Open-Vocabulary Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chng%2C+Y+X">Yong Xien Chng</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+X">Xuchong Qiu</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Y">Yizeng Han</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kai Ding</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+W">Wan Ding</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+G">Gao Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16278v2-abstract-short" style="display: inline;"> Despite extensive research, open-vocabulary segmentation methods still struggle to generalize across diverse domains. To reduce the computational cost of adapting Vision-Language Models (VLMs) while preserving their pre-trained knowledge, most methods freeze the VLMs for mask classification and train only the mask generator. However, our comprehensive analysis reveals a surprising insight: open-vo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16278v2-abstract-full').style.display = 'inline'; document.getElementById('2409.16278v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16278v2-abstract-full" style="display: none;"> Despite extensive research, open-vocabulary segmentation methods still struggle to generalize across diverse domains. To reduce the computational cost of adapting Vision-Language Models (VLMs) while preserving their pre-trained knowledge, most methods freeze the VLMs for mask classification and train only the mask generator. However, our comprehensive analysis reveals a surprising insight: open-vocabulary segmentation is primarily bottlenecked by mask classification, not mask generation. This discovery prompts us to rethink the existing paradigm and explore an alternative approach. Instead of freezing the VLM, we propose to freeze the pre-trained mask generator and focus on optimizing the mask classifier. Building on the observation that VLMs pre-trained on global-pooled image-text features often fail to capture fine-grained semantics necessary for effective mask classification, we propose a novel Fine-grained Semantic Adaptation (FISA) method to address this limitation. FISA enhances the extracted visual features with fine-grained semantic awareness by explicitly integrating this crucial semantic information early in the visual encoding process. As our method strategically optimizes only a small portion of the VLM's parameters, it enjoys the efficiency of adapting to new data distributions while largely preserving the valuable VLM pre-trained knowledge. Extensive ablation studies confirm the superiority of our approach. Notably, FISA achieves new state-of-the-art results across multiple representative benchmarks, improving performance by up to +1.0 PQ and +3.0 mIoU and reduces training costs by nearly 5x compared to previous best methods. Our code and data will be made public. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16278v2-abstract-full').style.display = 'none'; document.getElementById('2409.16278v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.06702">arXiv:2409.06702</a> <span> [<a href="https://arxiv.org/pdf/2409.06702">pdf</a>, <a href="https://arxiv.org/format/2409.06702">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Hint-AD: Holistically Aligned Interpretability in End-to-End Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kairui Ding</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Boyuan Chen</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Y">Yuchen Su</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+H">Huan-ang Gao</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+B">Bu Jin</a>, <a href="/search/cs?searchtype=author&query=Sima%2C+C">Chonghao Sima</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wuqiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaohui Li</a>, <a href="/search/cs?searchtype=author&query=Barsch%2C+P">Paul Barsch</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hongyang Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hao Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.06702v1-abstract-short" style="display: inline;"> End-to-end architectures in autonomous driving (AD) face a significant challenge in interpretability, impeding human-AI trust. Human-friendly natural language has been explored for tasks such as driving explanation and 3D captioning. However, previous works primarily focused on the paradigm of declarative interpretability, where the natural language interpretations are not grounded in the intermed… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06702v1-abstract-full').style.display = 'inline'; document.getElementById('2409.06702v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.06702v1-abstract-full" style="display: none;"> End-to-end architectures in autonomous driving (AD) face a significant challenge in interpretability, impeding human-AI trust. Human-friendly natural language has been explored for tasks such as driving explanation and 3D captioning. However, previous works primarily focused on the paradigm of declarative interpretability, where the natural language interpretations are not grounded in the intermediate outputs of AD systems, making the interpretations only declarative. In contrast, aligned interpretability establishes a connection between language and the intermediate outputs of AD systems. Here we introduce Hint-AD, an integrated AD-language system that generates language aligned with the holistic perception-prediction-planning outputs of the AD model. By incorporating the intermediate outputs and a holistic token mixer sub-network for effective feature adaptation, Hint-AD achieves desirable accuracy, achieving state-of-the-art results in driving language tasks including driving explanation, 3D dense captioning, and command prediction. To facilitate further study on driving explanation task on nuScenes, we also introduce a human-labeled dataset, Nu-X. Codes, dataset, and models will be publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06702v1-abstract-full').style.display = 'none'; document.getElementById('2409.06702v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CoRL 2024, Project Page: https://air-discover.github.io/Hint-AD/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.05344">arXiv:2409.05344</a> <span> [<a href="https://arxiv.org/pdf/2409.05344">pdf</a>, <a href="https://arxiv.org/format/2409.05344">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/LRA.2024.3468161">10.1109/LRA.2024.3468161 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> GOPT: Generalizable Online 3D Bin Packing via Transformer-based Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiong%2C+H">Heng Xiong</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+C">Changrong Guo</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+J">Jian Peng</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kai Ding</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wenjie Chen</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+X">Xuchong Qiu</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+L">Long Bai</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jianfeng Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.05344v2-abstract-short" style="display: inline;"> Robotic object packing has broad practical applications in the logistics and automation industry, often formulated by researchers as the online 3D Bin Packing Problem (3D-BPP). However, existing DRL-based methods primarily focus on enhancing performance in limited packing environments while neglecting the ability to generalize across multiple environments characterized by different bin dimensions.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05344v2-abstract-full').style.display = 'inline'; document.getElementById('2409.05344v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.05344v2-abstract-full" style="display: none;"> Robotic object packing has broad practical applications in the logistics and automation industry, often formulated by researchers as the online 3D Bin Packing Problem (3D-BPP). However, existing DRL-based methods primarily focus on enhancing performance in limited packing environments while neglecting the ability to generalize across multiple environments characterized by different bin dimensions. To this end, we propose GOPT, a generalizable online 3D Bin Packing approach via Transformer-based deep reinforcement learning (DRL). First, we design a Placement Generator module to yield finite subspaces as placement candidates and the representation of the bin. Second, we propose a Packing Transformer, which fuses the features of the items and bin, to identify the spatial correlation between the item to be packed and available sub-spaces within the bin. Coupling these two components enables GOPT's ability to perform inference on bins of varying dimensions. We conduct extensive experiments and demonstrate that GOPT not only achieves superior performance against the baselines, but also exhibits excellent generalization capabilities. Furthermore, the deployment with a robot showcases the practical applicability of our method in the real world. The source code will be publicly available at https://github.com/Xiong5Heng/GOPT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05344v2-abstract-full').style.display = 'none'; document.getElementById('2409.05344v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 6 figures. This paper has been accepted by IEEE Robotics and Automation Letters</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.04777">arXiv:2409.04777</a> <span> [<a href="https://arxiv.org/pdf/2409.04777">pdf</a>, <a href="https://arxiv.org/format/2409.04777">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Optimization Hyper-parameter Laws for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+X">Xingyu Xie</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kuangyu Ding</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+S">Shuicheng Yan</a>, <a href="/search/cs?searchtype=author&query=Toh%2C+K">Kim-Chuan Toh</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+T">Tianwen Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.04777v3-abstract-short" style="display: inline;"> Large Language Models have driven significant AI advancements, yet their training is resource-intensive and highly sensitive to hyper-parameter selection. While scaling laws provide valuable guidance on model size and data requirements, they fall short in choosing dynamic hyper-parameters, such as learning-rate (LR) schedules, that evolve during training. To bridge this gap, we present Optimizatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04777v3-abstract-full').style.display = 'inline'; document.getElementById('2409.04777v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.04777v3-abstract-full" style="display: none;"> Large Language Models have driven significant AI advancements, yet their training is resource-intensive and highly sensitive to hyper-parameter selection. While scaling laws provide valuable guidance on model size and data requirements, they fall short in choosing dynamic hyper-parameters, such as learning-rate (LR) schedules, that evolve during training. To bridge this gap, we present Optimization Hyper-parameter Laws (Opt-Laws), a framework that effectively captures the relationship between hyper-parameters and training outcomes, enabling the pre-selection of potential optimal schedules. Grounded in stochastic differential equations, Opt-Laws introduce novel mathematical interpretability and offer a robust theoretical foundation for some popular LR schedules. Our extensive validation across diverse model sizes and data scales demonstrates Opt-Laws' ability to accurately predict training loss and identify optimal LR schedule candidates in pre-training, continual training, and fine-tuning scenarios. This approach significantly reduces computational costs while enhancing overall model performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04777v3-abstract-full').style.display = 'none'; document.getElementById('2409.04777v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01980">arXiv:2409.01980</a> <span> [<a href="https://arxiv.org/pdf/2409.01980">pdf</a>, <a href="https://arxiv.org/format/2409.01980">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Large Language Models for Anomaly and Out-of-Distribution Detection: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+R">Ruiyao Xu</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaize Ding</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01980v3-abstract-short" style="display: inline;"> Detecting anomalies or out-of-distribution (OOD) samples is critical for maintaining the reliability and trustworthiness of machine learning systems. Recently, Large Language Models (LLMs) have demonstrated their effectiveness not only in natural language processing but also in broader applications due to their advanced comprehension and generative capabilities. The integration of LLMs into anomal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01980v3-abstract-full').style.display = 'inline'; document.getElementById('2409.01980v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01980v3-abstract-full" style="display: none;"> Detecting anomalies or out-of-distribution (OOD) samples is critical for maintaining the reliability and trustworthiness of machine learning systems. Recently, Large Language Models (LLMs) have demonstrated their effectiveness not only in natural language processing but also in broader applications due to their advanced comprehension and generative capabilities. The integration of LLMs into anomaly and OOD detection marks a significant shift from the traditional paradigm in the field. This survey focuses on the problem of anomaly and OOD detection under the context of LLMs. We propose a new taxonomy to categorize existing approaches into two classes based on the role played by LLMs. Following our proposed taxonomy, we further discuss the related work under each of the categories and finally discuss potential challenges and directions for future research in this field. We also provide an up-to-date reading list of relevant papers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01980v3-abstract-full').style.display = 'none'; document.getElementById('2409.01980v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NAACL 2025 Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.04315">arXiv:2408.04315</a> <span> [<a href="https://arxiv.org/pdf/2408.04315">pdf</a>, <a href="https://arxiv.org/format/2408.04315">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Federated Cubic Regularized Newton Learning with Sparsification-amplified Differential Privacy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huo%2C+W">Wei Huo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Changxin Liu</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kemi Ding</a>, <a href="/search/cs?searchtype=author&query=Johansson%2C+K+H">Karl Henrik Johansson</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+L">Ling Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.04315v1-abstract-short" style="display: inline;"> This paper investigates the use of the cubic-regularized Newton method within a federated learning framework while addressing two major concerns that commonly arise in federated learning: privacy leakage and communication bottleneck. We introduce a federated learning algorithm called Differentially Private Federated Cubic Regularized Newton (DP-FCRN). By leveraging second-order techniques, our alg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04315v1-abstract-full').style.display = 'inline'; document.getElementById('2408.04315v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.04315v1-abstract-full" style="display: none;"> This paper investigates the use of the cubic-regularized Newton method within a federated learning framework while addressing two major concerns that commonly arise in federated learning: privacy leakage and communication bottleneck. We introduce a federated learning algorithm called Differentially Private Federated Cubic Regularized Newton (DP-FCRN). By leveraging second-order techniques, our algorithm achieves lower iteration complexity compared to first-order methods. We also incorporate noise perturbation during local computations to ensure privacy. Furthermore, we employ sparsification in uplink transmission, which not only reduces the communication costs but also amplifies the privacy guarantee. Specifically, this approach reduces the necessary noise intensity without compromising privacy protection. We analyze the convergence properties of our algorithm and establish the privacy guarantee. Finally, we validate the effectiveness of the proposed algorithm through experiments on a benchmark dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04315v1-abstract-full').style.display = 'none'; document.getElementById('2408.04315v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.11282">arXiv:2407.11282</a> <span> [<a href="https://arxiv.org/pdf/2407.11282">pdf</a>, <a href="https://arxiv.org/format/2407.11282">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Uncertainty is Fragile: Manipulating Uncertainty in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+Q">Qingcheng Zeng</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+M">Mingyu Jin</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Q">Qinkai Yu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhenting Wang</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+W">Wenyue Hua</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Zihao Zhou</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+G">Guangyan Sun</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+Y">Yanda Meng</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+S">Shiqing Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qifan Wang</a>, <a href="/search/cs?searchtype=author&query=Juefei-Xu%2C+F">Felix Juefei-Xu</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaize Ding</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+F">Fan Yang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+R">Ruixiang Tang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yongfeng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.11282v3-abstract-short" style="display: inline;"> Large Language Models (LLMs) are employed across various high-stakes domains, where the reliability of their outputs is crucial. One commonly used method to assess the reliability of LLMs' responses is uncertainty estimation, which gauges the likelihood of their answers being correct. While many studies focus on improving the accuracy of uncertainty estimations for LLMs, our research investigates… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.11282v3-abstract-full').style.display = 'inline'; document.getElementById('2407.11282v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.11282v3-abstract-full" style="display: none;"> Large Language Models (LLMs) are employed across various high-stakes domains, where the reliability of their outputs is crucial. One commonly used method to assess the reliability of LLMs' responses is uncertainty estimation, which gauges the likelihood of their answers being correct. While many studies focus on improving the accuracy of uncertainty estimations for LLMs, our research investigates the fragility of uncertainty estimation and explores potential attacks. We demonstrate that an attacker can embed a backdoor in LLMs, which, when activated by a specific trigger in the input, manipulates the model's uncertainty without affecting the final output. Specifically, the proposed backdoor attack method can alter an LLM's output probability distribution, causing the probability distribution to converge towards an attacker-predefined distribution while ensuring that the top-1 prediction remains unchanged. Our experimental results demonstrate that this attack effectively undermines the model's self-evaluation reliability in multiple-choice questions. For instance, we achieved a 100 attack success rate (ASR) across three different triggering strategies in four models. Further, we investigate whether this manipulation generalizes across different prompts and domains. This work highlights a significant threat to the reliability of LLMs and underscores the need for future defenses against such attacks. The code is available at https://github.com/qcznlp/uncertainty_attack. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.11282v3-abstract-full').style.display = 'none'; document.getElementById('2407.11282v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.03937">arXiv:2407.03937</a> <span> [<a href="https://arxiv.org/pdf/2407.03937">pdf</a>, <a href="https://arxiv.org/format/2407.03937">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> TongGu: Mastering Classical Chinese Understanding with Knowledge-Grounded Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+J">Jiahuan Cao</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+D">Dezhi Peng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Peirong Zhang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yongxin Shi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kai Ding</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+L">Lianwen Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.03937v2-abstract-short" style="display: inline;"> Classical Chinese is a gateway to the rich heritage and wisdom of ancient China, yet its complexities pose formidable comprehension barriers for most modern people without specialized knowledge. While Large Language Models (LLMs) have shown remarkable capabilities in Natural Language Processing (NLP), they struggle with Classical Chinese Understanding (CCU), especially in data-demanding and knowle… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03937v2-abstract-full').style.display = 'inline'; document.getElementById('2407.03937v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.03937v2-abstract-full" style="display: none;"> Classical Chinese is a gateway to the rich heritage and wisdom of ancient China, yet its complexities pose formidable comprehension barriers for most modern people without specialized knowledge. While Large Language Models (LLMs) have shown remarkable capabilities in Natural Language Processing (NLP), they struggle with Classical Chinese Understanding (CCU), especially in data-demanding and knowledge-intensive tasks. In response to this dilemma, we propose \textbf{TongGu} (mean understanding ancient and modern), the first CCU-specific LLM, underpinned by three core contributions. First, we construct a two-stage instruction-tuning dataset ACCN-INS derived from rich classical Chinese corpora, aiming to unlock the full CCU potential of LLMs. Second, we propose Redundancy-Aware Tuning (RAT) to prevent catastrophic forgetting, enabling TongGu to acquire new capabilities while preserving its foundational knowledge. Third, we present a CCU Retrieval-Augmented Generation (CCU-RAG) technique to reduce hallucinations based on knowledge-grounding. Extensive experiments across 24 diverse CCU tasks validate TongGu's superior ability, underscoring the effectiveness of RAT and CCU-RAG. The model and dataset are available at \url{https://github.com/SCUT-DLVCLab/TongGu-LLM}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03937v2-abstract-full').style.display = 'none'; document.getElementById('2407.03937v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.01085">arXiv:2407.01085</a> <span> [<a href="https://arxiv.org/pdf/2407.01085">pdf</a>, <a href="https://arxiv.org/format/2407.01085">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Explaining Length Bias in LLM-Based Preference Evaluations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+Z">Zhengyu Hu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linxin Song</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jieyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Z">Zheyuan Xiao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianfu Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhengyu Chen</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+N+J">Nicholas Jing Yuan</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+J">Jianxun Lian</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaize Ding</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+H">Hui Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.01085v3-abstract-short" style="display: inline;"> The use of large language models (LLMs) as judges, particularly in preference comparisons, has become widespread, but this reveals a notable bias towards longer responses, undermining the reliability of such evaluations. To better understand such bias, we propose to decompose the preference evaluation metric, specifically the win rate, into two key components: desirability and information mass, wh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01085v3-abstract-full').style.display = 'inline'; document.getElementById('2407.01085v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.01085v3-abstract-full" style="display: none;"> The use of large language models (LLMs) as judges, particularly in preference comparisons, has become widespread, but this reveals a notable bias towards longer responses, undermining the reliability of such evaluations. To better understand such bias, we propose to decompose the preference evaluation metric, specifically the win rate, into two key components: desirability and information mass, where the former is length-independent and related to trustworthiness such as correctness, toxicity, and consistency, and the latter is length-dependent and represents the amount of information in the response. We empirically demonstrated the decomposition through controlled experiments and found that response length impacts evaluations by influencing information mass. To derive a reliable evaluation metric that assesses content quality without being confounded by response length, we propose AdapAlpaca, a simple yet effective adjustment to win rate measurement. Specifically, AdapAlpaca ensures a fair comparison of response quality by aligning the lengths of reference and test model responses under equivalent length intervals. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01085v3-abstract-full').style.display = 'none'; document.getElementById('2407.01085v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.18301">arXiv:2406.18301</a> <span> [<a href="https://arxiv.org/pdf/2406.18301">pdf</a>, <a href="https://arxiv.org/format/2406.18301">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> MSR-86K: An Evolving, Multilingual Corpus with 86,300 Hours of Transcribed Audio for Speech Recognition Research </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+S">Song Li</a>, <a href="/search/cs?searchtype=author&query=You%2C+Y">Yongbin You</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xuezhi Wang</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Z">Zhengkun Tian</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Ke Ding</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+G">Guanglu Wan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.18301v1-abstract-short" style="display: inline;"> Recently, multilingual artificial intelligence assistants, exemplified by ChatGPT, have gained immense popularity. As a crucial gateway to human-computer interaction, multilingual automatic speech recognition (ASR) has also garnered significant attention, as evidenced by systems like Whisper. However, the proprietary nature of the training data has impeded researchers' efforts to study multilingua… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18301v1-abstract-full').style.display = 'inline'; document.getElementById('2406.18301v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.18301v1-abstract-full" style="display: none;"> Recently, multilingual artificial intelligence assistants, exemplified by ChatGPT, have gained immense popularity. As a crucial gateway to human-computer interaction, multilingual automatic speech recognition (ASR) has also garnered significant attention, as evidenced by systems like Whisper. However, the proprietary nature of the training data has impeded researchers' efforts to study multilingual ASR. This paper introduces MSR-86K, an evolving, large-scale multilingual corpus for speech recognition research. The corpus is derived from publicly accessible videos on YouTube, comprising 15 languages and a total of 86,300 hours of transcribed ASR data. We also introduce how to use the MSR-86K corpus and other open-source corpora to train a robust multilingual ASR model that is competitive with Whisper. MSR-86K will be publicly released on HuggingFace, and we believe that such a large corpus will pave new avenues for research in multilingual ASR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18301v1-abstract-full').style.display = 'none'; document.getElementById('2406.18301v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by InterSpeech 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.15720">arXiv:2406.15720</a> <span> [<a href="https://arxiv.org/pdf/2406.15720">pdf</a>, <a href="https://arxiv.org/format/2406.15720">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Scaling Laws for Fact Memorization of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xingyu Lu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaonan Li</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Q">Qinyuan Cheng</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kai Ding</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xuanjing Huang</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+X">Xipeng Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.15720v1-abstract-short" style="display: inline;"> Fact knowledge memorization is crucial for Large Language Models (LLM) to generate factual and reliable responses. However, the behaviors of LLM fact memorization remain under-explored. In this paper, we analyze the scaling laws for LLM's fact knowledge and LLMs' behaviors of memorizing different types of facts. We find that LLMs' fact knowledge capacity has a linear and negative exponential law r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15720v1-abstract-full').style.display = 'inline'; document.getElementById('2406.15720v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.15720v1-abstract-full" style="display: none;"> Fact knowledge memorization is crucial for Large Language Models (LLM) to generate factual and reliable responses. However, the behaviors of LLM fact memorization remain under-explored. In this paper, we analyze the scaling laws for LLM's fact knowledge and LLMs' behaviors of memorizing different types of facts. We find that LLMs' fact knowledge capacity has a linear and negative exponential law relationship with model size and training epochs, respectively. Estimated by the built scaling law, memorizing the whole Wikidata's facts requires training an LLM with 1000B non-embed parameters for 100 epochs, suggesting that using LLMs to memorize all public facts is almost implausible for a general pre-training setting. Meanwhile, we find that LLMs can generalize on unseen fact knowledge and its scaling law is similar to general pre-training. Additionally, we analyze the compatibility and preference of LLMs' fact memorization. For compatibility, we find LLMs struggle with memorizing redundant facts in a unified way. Only when correlated facts have the same direction and structure, the LLM can compatibly memorize them. This shows the inefficiency of LLM memorization for redundant facts. For preference, the LLM pays more attention to memorizing more frequent and difficult facts, and the subsequent facts can overwrite prior facts' memorization, which significantly hinders low-frequency facts memorization. Our findings reveal the capacity and characteristics of LLMs' fact knowledge learning, which provide directions for LLMs' fact knowledge augmentation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15720v1-abstract-full').style.display = 'none'; document.getElementById('2406.15720v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.15523">arXiv:2406.15523</a> <span> [<a href="https://arxiv.org/pdf/2406.15523">pdf</a>, <a href="https://arxiv.org/format/2406.15523">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Unifying Unsupervised Graph-Level Anomaly Detection and Out-of-Distribution Detection: A Benchmark </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yili Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yixin Liu</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+X">Xu Shen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chenyu Li</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaize Ding</a>, <a href="/search/cs?searchtype=author&query=Miao%2C+R">Rui Miao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Ying Wang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+S">Shirui Pan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xin Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.15523v1-abstract-short" style="display: inline;"> To build safe and reliable graph machine learning systems, unsupervised graph-level anomaly detection (GLAD) and unsupervised graph-level out-of-distribution (OOD) detection (GLOD) have received significant attention in recent years. Though those two lines of research indeed share the same objective, they have been studied independently in the community due to distinct evaluation setups, creating… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15523v1-abstract-full').style.display = 'inline'; document.getElementById('2406.15523v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.15523v1-abstract-full" style="display: none;"> To build safe and reliable graph machine learning systems, unsupervised graph-level anomaly detection (GLAD) and unsupervised graph-level out-of-distribution (OOD) detection (GLOD) have received significant attention in recent years. Though those two lines of research indeed share the same objective, they have been studied independently in the community due to distinct evaluation setups, creating a gap that hinders the application and evaluation of methods from one to the other. To bridge the gap, in this work, we present a Unified Benchmark for unsupervised Graph-level OOD and anomaly Detection (our method), a comprehensive evaluation framework that unifies GLAD and GLOD under the concept of generalized graph-level OOD detection. Our benchmark encompasses 35 datasets spanning four practical anomaly and OOD detection scenarios, facilitating the comparison of 16 representative GLAD/GLOD methods. We conduct multi-dimensional analyses to explore the effectiveness, generalizability, robustness, and efficiency of existing methods, shedding light on their strengths and limitations. Furthermore, we provide an open-source codebase (https://github.com/UB-GOLD/UB-GOLD) of our method to foster reproducible research and outline potential directions for future investigations based on our insights. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15523v1-abstract-full').style.display = 'none'; document.getElementById('2406.15523v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.12747">arXiv:2406.12747</a> <span> [<a href="https://arxiv.org/pdf/2406.12747">pdf</a>, <a href="https://arxiv.org/format/2406.12747">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TSI-Bench: Benchmarking Time Series Imputation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+W">Wenjie Du</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jun Wang</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+L">Linglong Qian</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yiyuan Yang</a>, <a href="/search/cs?searchtype=author&query=Ibrahim%2C+Z">Zina Ibrahim</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Fanxing Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zepu Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haoxin Liu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhiyuan Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yingjie Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wenjia Wang</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaize Ding</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+Y">Yuxuan Liang</a>, <a href="/search/cs?searchtype=author&query=Prakash%2C+B+A">B. Aditya Prakash</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Q">Qingsong Wen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.12747v2-abstract-short" style="display: inline;"> Effective imputation is a crucial preprocessing step for time series analysis. Despite the development of numerous deep learning algorithms for time series imputation, the community lacks standardized and comprehensive benchmark platforms to effectively evaluate imputation performance across different settings. Moreover, although many deep learning forecasting algorithms have demonstrated excellen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12747v2-abstract-full').style.display = 'inline'; document.getElementById('2406.12747v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.12747v2-abstract-full" style="display: none;"> Effective imputation is a crucial preprocessing step for time series analysis. Despite the development of numerous deep learning algorithms for time series imputation, the community lacks standardized and comprehensive benchmark platforms to effectively evaluate imputation performance across different settings. Moreover, although many deep learning forecasting algorithms have demonstrated excellent performance, whether their modelling achievements can be transferred to time series imputation tasks remains unexplored. To bridge these gaps, we develop TSI-Bench, the first (to our knowledge) comprehensive benchmark suite for time series imputation utilizing deep learning techniques. The TSI-Bench pipeline standardizes experimental settings to enable fair evaluation of imputation algorithms and identification of meaningful insights into the influence of domain-appropriate missing rates and patterns on model performance. Furthermore, TSI-Bench innovatively provides a systematic paradigm to tailor time series forecasting algorithms for imputation purposes. Our extensive study across 34,804 experiments, 28 algorithms, and 8 datasets with diverse missingness scenarios demonstrates TSI-Bench's effectiveness in diverse downstream tasks and potential to unlock future directions in time series imputation research and analysis. All source code and experiment logs are released at https://github.com/WenjieDu/AwesomeImputation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12747v2-abstract-full').style.display = 'none'; document.getElementById('2406.12747v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.10952">arXiv:2406.10952</a> <span> [<a href="https://arxiv.org/pdf/2406.10952">pdf</a>, <a href="https://arxiv.org/format/2406.10952">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Avoiding Copyright Infringement via Large Language Model Unlearning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dou%2C+G">Guangyao Dou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zheyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+Q">Qing Lyu</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaize Ding</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+E">Eric Wong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.10952v3-abstract-short" style="display: inline;"> Pre-trained Large Language Models (LLMs) have demonstrated remarkable capabilities but also pose risks by learning and generating copyrighted material, leading to significant legal and ethical concerns. In real-world scenarios, model owners need to continuously address copyright infringement as new requests for content removal emerge at different time points. This leads to the need for sequential… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10952v3-abstract-full').style.display = 'inline'; document.getElementById('2406.10952v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.10952v3-abstract-full" style="display: none;"> Pre-trained Large Language Models (LLMs) have demonstrated remarkable capabilities but also pose risks by learning and generating copyrighted material, leading to significant legal and ethical concerns. In real-world scenarios, model owners need to continuously address copyright infringement as new requests for content removal emerge at different time points. This leads to the need for sequential unlearning, where copyrighted content is removed sequentially as new requests arise. Despite its practical relevance, sequential unlearning in the context of copyright infringement has not been rigorously explored in existing literature. To address this gap, we propose Stable Sequential Unlearning (SSU), a novel framework designed to unlearn copyrighted content from LLMs over multiple time steps. Our approach works by identifying and removing specific weight updates in the model's parameters that correspond to copyrighted content. We improve unlearning efficacy by introducing random labeling loss and ensuring the model retains its general-purpose knowledge by adjusting targeted parameters. Experimental results show that SSU achieves an effective trade-off between unlearning efficacy and general-purpose language abilities, outperforming existing baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10952v3-abstract-full').style.display = 'none'; document.getElementById('2406.10952v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.09098">arXiv:2406.09098</a> <span> [<a href="https://arxiv.org/pdf/2406.09098">pdf</a>, <a href="https://arxiv.org/format/2406.09098">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SciKnowEval: Evaluating Multi-level Scientific Knowledge of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+K">Kehua Feng</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Keyan Ding</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Weijie Wang</a>, <a href="/search/cs?searchtype=author&query=Zhuang%2C+X">Xiang Zhuang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zeyuan Wang</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+M">Ming Qin</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yu Zhao</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+J">Jianhua Yao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Huajun Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.09098v3-abstract-short" style="display: inline;"> Large language models (LLMs) have gained increasing prominence in scientific research, but there is a lack of comprehensive benchmarks to fully evaluate their proficiency in understanding and mastering scientific knowledge. To address this need, we introduce the SciKnowEval benchmark, a novel framework that systematically evaluates LLMs across five progressive levels of scientific knowledge: study… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09098v3-abstract-full').style.display = 'inline'; document.getElementById('2406.09098v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.09098v3-abstract-full" style="display: none;"> Large language models (LLMs) have gained increasing prominence in scientific research, but there is a lack of comprehensive benchmarks to fully evaluate their proficiency in understanding and mastering scientific knowledge. To address this need, we introduce the SciKnowEval benchmark, a novel framework that systematically evaluates LLMs across five progressive levels of scientific knowledge: studying extensively, inquiring earnestly, thinking profoundly, discerning clearly, and practicing assiduously. These levels aim to assess the breadth and depth of scientific knowledge in LLMs, including memory, comprehension, reasoning, discernment, and application. Specifically, we first construct a large-scale evaluation dataset encompassing 70K multi-level scientific problems and solutions in the domains of biology, chemistry, physics, and materials science. By leveraging this dataset, we benchmark 26 advanced open-source and proprietary LLMs using zero-shot and few-shot prompting strategies. The results reveal that despite the state-of-the-art performance of proprietary LLMs, there is still significant room for improvement, particularly in addressing scientific reasoning and applications. We anticipate that SciKnowEval will establish a standard for benchmarking LLMs in science research and promote the development of stronger scientific LLMs. The dataset and code are publicly available at https://scimind.ai/sciknoweval . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09098v3-abstract-full').style.display = 'none'; document.getElementById('2406.09098v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">47 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.00115">arXiv:2406.00115</a> <span> [<a href="https://arxiv.org/pdf/2406.00115">pdf</a>, <a href="https://arxiv.org/format/2406.00115">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> Towards LLM-Powered Verilog RTL Assistant: Self-Verification and Self-Correction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+H">Hanxian Huang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zhenghan Lin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zixuan Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xin Chen</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Ke Ding</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jishen Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.00115v1-abstract-short" style="display: inline;"> We explore the use of Large Language Models (LLMs) to generate high-quality Register-Transfer Level (RTL) code with minimal human interference. The traditional RTL design workflow requires human experts to manually write high-quality RTL code, which is time-consuming and error-prone. With the help of emerging LLMs, developers can describe their requirements to LLMs which then generate correspondin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00115v1-abstract-full').style.display = 'inline'; document.getElementById('2406.00115v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.00115v1-abstract-full" style="display: none;"> We explore the use of Large Language Models (LLMs) to generate high-quality Register-Transfer Level (RTL) code with minimal human interference. The traditional RTL design workflow requires human experts to manually write high-quality RTL code, which is time-consuming and error-prone. With the help of emerging LLMs, developers can describe their requirements to LLMs which then generate corresponding code in Python, C, Java, and more. Adopting LLMs to generate RTL design in hardware description languages is not trivial, given the complex nature of hardware design and the generated design has to meet the timing and physical constraints. We propose VeriAssist, an LLM-powered programming assistant for Verilog RTL design workflow. VeriAssist takes RTL design descriptions as input and generates high-quality RTL code with corresponding test benches. VeriAssist enables the LLM to self-correct and self-verify the generated code by adopting an automatic prompting system and integrating RTL simulator in the code generation loop. To generate an RTL design, VeriAssist first generates the initial RTL code and corresponding test benches, followed by a self-verification step that walks through the code with test cases to reason the code behavior at different time steps, and finally it self-corrects the code by reading the compilation and simulation results and generating final RTL code that fixes errors in compilation and simulation. This design fully leverages the LLMs' capabilities on multi-turn interaction and chain-of-thought reasoning to improve the quality of the generated code. We evaluate VeriAssist with various benchmark suites and find it significantly improves both syntax and functionality correctness over existing LLM implementations, thus minimizing human intervention and making RTL design more accessible to novice designers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00115v1-abstract-full').style.display = 'none'; document.getElementById('2406.00115v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.18790">arXiv:2405.18790</a> <span> [<a href="https://arxiv.org/pdf/2405.18790">pdf</a>, <a href="https://arxiv.org/format/2405.18790">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Opinion-Unaware Blind Image Quality Assessment using Multi-Scale Deep Feature Statistics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ni%2C+Z">Zhangkai Ni</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yue Liu</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Keyan Ding</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Wenhan Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hanli Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shiqi Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.18790v1-abstract-short" style="display: inline;"> Deep learning-based methods have significantly influenced the blind image quality assessment (BIQA) field, however, these methods often require training using large amounts of human rating data. In contrast, traditional knowledge-based methods are cost-effective for training but face challenges in effectively extracting features aligned with human visual perception. To bridge these gaps, we propos… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18790v1-abstract-full').style.display = 'inline'; document.getElementById('2405.18790v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.18790v1-abstract-full" style="display: none;"> Deep learning-based methods have significantly influenced the blind image quality assessment (BIQA) field, however, these methods often require training using large amounts of human rating data. In contrast, traditional knowledge-based methods are cost-effective for training but face challenges in effectively extracting features aligned with human visual perception. To bridge these gaps, we propose integrating deep features from pre-trained visual models with a statistical analysis model into a Multi-scale Deep Feature Statistics (MDFS) model for achieving opinion-unaware BIQA (OU-BIQA), thereby eliminating the reliance on human rating data and significantly improving training efficiency. Specifically, we extract patch-wise multi-scale features from pre-trained vision models, which are subsequently fitted into a multivariate Gaussian (MVG) model. The final quality score is determined by quantifying the distance between the MVG model derived from the test image and the benchmark MVG model derived from the high-quality image set. A comprehensive series of experiments conducted on various datasets show that our proposed model exhibits superior consistency with human visual perception compared to state-of-the-art BIQA models. Furthermore, it shows improved generalizability across diverse target-specific BIQA tasks. Our code is available at: https://github.com/eezkni/MDFS <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18790v1-abstract-full').style.display = 'none'; document.getElementById('2405.18790v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to IEEE Transactions on Multimedia 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.15234">arXiv:2405.15234</a> <span> [<a href="https://arxiv.org/pdf/2405.15234">pdf</a>, <a href="https://arxiv.org/format/2405.15234">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Defensive Unlearning with Adversarial Training for Robust Concept Erasure in Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yimeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xin Chen</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+J">Jinghan Jia</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yihua Zhang</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+C">Chongyu Fan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiancheng Liu</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+M">Mingyi Hong</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Ke Ding</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Sijia Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.15234v3-abstract-short" style="display: inline;"> Diffusion models (DMs) have achieved remarkable success in text-to-image generation, but they also pose safety risks, such as the potential generation of harmful content and copyright violations. The techniques of machine unlearning, also known as concept erasing, have been developed to address these risks. However, these techniques remain vulnerable to adversarial prompt attacks, which can prompt… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15234v3-abstract-full').style.display = 'inline'; document.getElementById('2405.15234v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.15234v3-abstract-full" style="display: none;"> Diffusion models (DMs) have achieved remarkable success in text-to-image generation, but they also pose safety risks, such as the potential generation of harmful content and copyright violations. The techniques of machine unlearning, also known as concept erasing, have been developed to address these risks. However, these techniques remain vulnerable to adversarial prompt attacks, which can prompt DMs post-unlearning to regenerate undesired images containing concepts (such as nudity) meant to be erased. This work aims to enhance the robustness of concept erasing by integrating the principle of adversarial training (AT) into machine unlearning, resulting in the robust unlearning framework referred to as AdvUnlearn. However, achieving this effectively and efficiently is highly nontrivial. First, we find that a straightforward implementation of AT compromises DMs' image generation quality post-unlearning. To address this, we develop a utility-retaining regularization on an additional retain set, optimizing the trade-off between concept erasure robustness and model utility in AdvUnlearn. Moreover, we identify the text encoder as a more suitable module for robustification compared to UNet, ensuring unlearning effectiveness. And the acquired text encoder can serve as a plug-and-play robust unlearner for various DM types. Empirically, we perform extensive experiments to demonstrate the robustness advantage of AdvUnlearn across various DM unlearning scenarios, including the erasure of nudity, objects, and style concepts. In addition to robustness, AdvUnlearn also achieves a balanced tradeoff with model utility. To our knowledge, this is the first work to systematically explore robust DM unlearning through AT, setting it apart from existing methods that overlook robustness in concept erasing. Codes are available at: https://github.com/OPTML-Group/AdvUnlearn <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.15234v3-abstract-full').style.display = 'none'; document.getElementById('2405.15234v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS'24. Codes are available at https://github.com/OPTML-Group/AdvUnlearn</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.14743">arXiv:2405.14743</a> <span> [<a href="https://arxiv.org/pdf/2405.14743">pdf</a>, <a href="https://arxiv.org/format/2405.14743">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Iterative Causal Segmentation: Filling the Gap between Market Segmentation and Marketing Strategy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaihua Ding</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+J">Jingsong Cui</a>, <a href="/search/cs?searchtype=author&query=Soltani%2C+M">Mohammad Soltani</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+J">Jing Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.14743v1-abstract-short" style="display: inline;"> The field of causal Machine Learning (ML) has made significant strides in recent years. Notable breakthroughs include methods such as meta learners (arXiv:1706.03461v6) and heterogeneous doubly robust estimators (arXiv:2004.14497) introduced in the last five years. Despite these advancements, the field still faces challenges, particularly in managing tightly coupled systems where both the causal t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14743v1-abstract-full').style.display = 'inline'; document.getElementById('2405.14743v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.14743v1-abstract-full" style="display: none;"> The field of causal Machine Learning (ML) has made significant strides in recent years. Notable breakthroughs include methods such as meta learners (arXiv:1706.03461v6) and heterogeneous doubly robust estimators (arXiv:2004.14497) introduced in the last five years. Despite these advancements, the field still faces challenges, particularly in managing tightly coupled systems where both the causal treatment variable and a confounding covariate must serve as key decision-making indicators. This scenario is common in applications of causal ML for marketing, such as marketing segmentation and incremental marketing uplift. In this work, we present our formally proven algorithm, iterative causal segmentation, to address this issue. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14743v1-abstract-full').style.display = 'none'; document.getElementById('2405.14743v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.12244">arXiv:2405.12244</a> <span> [<a href="https://arxiv.org/pdf/2405.12244">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Physics and Society">physics.soc-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Real-Time Go-Around Prediction: A case study of JFK airport </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+K">Ke Liu</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaijing Ding</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+L">Lu Dai</a>, <a href="/search/cs?searchtype=author&query=Hansen%2C+M">Mark Hansen</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+K">Kennis Chan</a>, <a href="/search/cs?searchtype=author&query=Schade%2C+J">John Schade</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.12244v1-abstract-short" style="display: inline;"> In this paper, we employ the long-short-term memory model (LSTM) to predict the real-time go-around probability as an arrival flight is approaching JFK airport and within 10 nm of the landing runway threshold. We further develop methods to examine the causes to go-around occurrences both from a global view and an individual flight perspective. According to our results, in-trail spacing, and simult… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.12244v1-abstract-full').style.display = 'inline'; document.getElementById('2405.12244v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.12244v1-abstract-full" style="display: none;"> In this paper, we employ the long-short-term memory model (LSTM) to predict the real-time go-around probability as an arrival flight is approaching JFK airport and within 10 nm of the landing runway threshold. We further develop methods to examine the causes to go-around occurrences both from a global view and an individual flight perspective. According to our results, in-trail spacing, and simultaneous runway operation appear to be the top factors that contribute to overall go-around occurrences. We then integrate these pre-trained models and analyses with real-time data streaming, and finally develop a demo web-based user interface that integrates the different components designed previously into a real-time tool that can eventually be used by flight crews and other line personnel to identify situations in which there is a high risk of a go-around. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.12244v1-abstract-full').style.display = 'none'; document.getElementById('2405.12244v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://www.icrat.org/</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> International Conference on Research in Air Transportation (ICRAT2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.08293">arXiv:2405.08293</a> <span> [<a href="https://arxiv.org/pdf/2405.08293">pdf</a>, <a href="https://arxiv.org/format/2405.08293">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Airport Delay Prediction with Temporal Fusion Transformers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+K">Ke Liu</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaijing Ding</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xi Cheng</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+G">Guanhao Xu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xin Hu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tong Liu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+S">Siyuan Feng</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+B">Binze Cai</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jianan Chen</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Hui Lin</a>, <a href="/search/cs?searchtype=author&query=Song%2C+J">Jilin Song</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+C">Chen Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.08293v3-abstract-short" style="display: inline;"> Since flight delay hurts passengers, airlines, and airports, its prediction becomes crucial for the decision-making of all stakeholders in the aviation industry and thus has been attempted by various previous research. However, previous delay predictions are often categorical and at a highly aggregated level. To improve that, this study proposes to apply the novel Temporal Fusion Transformer model… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.08293v3-abstract-full').style.display = 'inline'; document.getElementById('2405.08293v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.08293v3-abstract-full" style="display: none;"> Since flight delay hurts passengers, airlines, and airports, its prediction becomes crucial for the decision-making of all stakeholders in the aviation industry and thus has been attempted by various previous research. However, previous delay predictions are often categorical and at a highly aggregated level. To improve that, this study proposes to apply the novel Temporal Fusion Transformer model and predict numerical airport arrival delays at quarter hour level for U.S. top 30 airports. Inputs to our model include airport demand and capacity forecasts, historic airport operation efficiency information, airport wind and visibility conditions, as well as enroute weather and traffic conditions. The results show that our model achieves satisfactory performance measured by small prediction errors on the test set. In addition, the interpretability analysis of the model outputs identifies the important input factors for delay prediction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.08293v3-abstract-full').style.display = 'none'; document.getElementById('2405.08293v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.04757">arXiv:2405.04757</a> <span> [<a href="https://arxiv.org/pdf/2405.04757">pdf</a>, <a href="https://arxiv.org/format/2405.04757">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Communication-efficient and Differentially-private Distributed Nash Equilibrium Seeking with Linear Convergence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiaomeng Chen</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+W">Wei Huo</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kemi Ding</a>, <a href="/search/cs?searchtype=author&query=Dey%2C+S">Subhrakanti Dey</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+L">Ling Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.04757v1-abstract-short" style="display: inline;"> The distributed computation of a Nash equilibrium (NE) for non-cooperative games is gaining increased attention recently. Due to the nature of distributed systems, privacy and communication efficiency are two critical concerns. Traditional approaches often address these critical concerns in isolation. This work introduces a unified framework, named CDP-NES, designed to improve communication effici… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04757v1-abstract-full').style.display = 'inline'; document.getElementById('2405.04757v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.04757v1-abstract-full" style="display: none;"> The distributed computation of a Nash equilibrium (NE) for non-cooperative games is gaining increased attention recently. Due to the nature of distributed systems, privacy and communication efficiency are two critical concerns. Traditional approaches often address these critical concerns in isolation. This work introduces a unified framework, named CDP-NES, designed to improve communication efficiency in the privacy-preserving NE seeking algorithm for distributed non-cooperative games over directed graphs. Leveraging both general compression operators and the noise adding mechanism, CDP-NES perturbs local states with Laplacian noise and applies difference compression prior to their exchange among neighbors. We prove that CDP-NES not only achieves linear convergence to a neighborhood of the NE in games with restricted monotone mappings but also guarantees $蔚$-differential privacy, addressing privacy and communication efficiency simultaneously. Finally, simulations are provided to illustrate the effectiveness of the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.04757v1-abstract-full').style.display = 'none'; document.getElementById('2405.04757v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.03106">arXiv:2405.03106</a> <span> [<a href="https://arxiv.org/pdf/2405.03106">pdf</a>, <a href="https://arxiv.org/format/2405.03106">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> Compression-based Privacy Preservation for Distributed Nash Equilibrium Seeking in Aggregative Games </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huo%2C+W">Wei Huo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiaomeng Chen</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kemi Ding</a>, <a href="/search/cs?searchtype=author&query=Dey%2C+S">Subhrakanti Dey</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+L">Ling Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.03106v1-abstract-short" style="display: inline;"> This paper explores distributed aggregative games in multi-agent systems. Current methods for finding distributed Nash equilibrium require players to send original messages to their neighbors, leading to communication burden and privacy issues. To jointly address these issues, we propose an algorithm that uses stochastic compression to save communication resources and conceal information through r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.03106v1-abstract-full').style.display = 'inline'; document.getElementById('2405.03106v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.03106v1-abstract-full" style="display: none;"> This paper explores distributed aggregative games in multi-agent systems. Current methods for finding distributed Nash equilibrium require players to send original messages to their neighbors, leading to communication burden and privacy issues. To jointly address these issues, we propose an algorithm that uses stochastic compression to save communication resources and conceal information through random errors induced by compression. Our theoretical analysis shows that the algorithm guarantees convergence accuracy, even with aggressive compression errors used to protect privacy. We prove that the algorithm achieves differential privacy through a stochastic quantization scheme. Simulation results for energy consumption games support the effectiveness of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.03106v1-abstract-full').style.display = 'none'; document.getElementById('2405.03106v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.17642">arXiv:2404.17642</a> <span> [<a href="https://arxiv.org/pdf/2404.17642">pdf</a>, <a href="https://arxiv.org/format/2404.17642">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Empowering Large Language Models for Textual Data Augmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yichuan Li</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaize Ding</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jianling Wang</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+K">Kyumin Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.17642v1-abstract-short" style="display: inline;"> With the capabilities of understanding and executing natural language instructions, Large language models (LLMs) can potentially act as a powerful tool for textual data augmentation. However, the quality of augmented data depends heavily on the augmentation instructions provided, and the effectiveness can fluctuate across different downstream tasks. While manually crafting and selecting instructio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17642v1-abstract-full').style.display = 'inline'; document.getElementById('2404.17642v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.17642v1-abstract-full" style="display: none;"> With the capabilities of understanding and executing natural language instructions, Large language models (LLMs) can potentially act as a powerful tool for textual data augmentation. However, the quality of augmented data depends heavily on the augmentation instructions provided, and the effectiveness can fluctuate across different downstream tasks. While manually crafting and selecting instructions can offer some improvement, this approach faces scalability and consistency issues in practice due to the diversity of downstream tasks. In this work, we address these limitations by proposing a new solution, which can automatically generate a large pool of augmentation instructions and select the most suitable task-informed instructions, thereby empowering LLMs to create high-quality augmented data for different downstream tasks. Empirically, the proposed approach consistently generates augmented data with better quality compared to non-LLM and LLM-based data augmentation methods, leading to the best performance on 26 few-shot learning tasks sourced from a wide range of application domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17642v1-abstract-full').style.display = 'none'; document.getElementById('2404.17642v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.09438">arXiv:2404.09438</a> <span> [<a href="https://arxiv.org/pdf/2404.09438">pdf</a>, <a href="https://arxiv.org/format/2404.09438">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Developing Lagrangian-based Methods for Nonsmooth Nonconvex Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiao%2C+N">Nachuan Xiao</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kuangyu Ding</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xiaoyin Hu</a>, <a href="/search/cs?searchtype=author&query=Toh%2C+K">Kim-Chuan Toh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.09438v1-abstract-short" style="display: inline;"> In this paper, we consider the minimization of a nonsmooth nonconvex objective function $f(x)$ over a closed convex subset $\mathcal{X}$ of $\mathbb{R}^n$, with additional nonsmooth nonconvex constraints $c(x) = 0$. We develop a unified framework for developing Lagrangian-based methods, which takes a single-step update to the primal variables by some subgradient methods in each iteration. These su… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09438v1-abstract-full').style.display = 'inline'; document.getElementById('2404.09438v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.09438v1-abstract-full" style="display: none;"> In this paper, we consider the minimization of a nonsmooth nonconvex objective function $f(x)$ over a closed convex subset $\mathcal{X}$ of $\mathbb{R}^n$, with additional nonsmooth nonconvex constraints $c(x) = 0$. We develop a unified framework for developing Lagrangian-based methods, which takes a single-step update to the primal variables by some subgradient methods in each iteration. These subgradient methods are ``embedded'' into our framework, in the sense that they are incorporated as black-box updates to the primal variables. We prove that our proposed framework inherits the global convergence guarantees from these embedded subgradient methods under mild conditions. In addition, we show that our framework can be extended to solve constrained optimization problems with expectation constraints. Based on the proposed framework, we show that a wide range of existing stochastic subgradient methods, including the proximal SGD, proximal momentum SGD, and proximal ADAM, can be embedded into Lagrangian-based methods. Preliminary numerical experiments on deep learning tasks illustrate that our proposed framework yields efficient variants of Lagrangian-based methods with convergence guarantees for nonconvex nonsmooth constrained optimization problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09438v1-abstract-full').style.display = 'none'; document.getElementById('2404.09438v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.08008">arXiv:2404.08008</a> <span> [<a href="https://arxiv.org/pdf/2404.08008">pdf</a>, <a href="https://arxiv.org/format/2404.08008">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Sample-Efficient Human Evaluation of Large Language Models via Maximum Discrepancy Competition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+K">Kehua Feng</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Keyan Ding</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+K">Kede Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhihua Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Huajun Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.08008v1-abstract-short" style="display: inline;"> The past years have witnessed a proliferation of large language models (LLMs). Yet, automated and unbiased evaluation of LLMs is challenging due to the inaccuracy of standard metrics in reflecting human preferences and the inefficiency in sampling informative and diverse test examples. While human evaluation remains the gold standard, it is expensive and time-consuming, especially when dealing wit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.08008v1-abstract-full').style.display = 'inline'; document.getElementById('2404.08008v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.08008v1-abstract-full" style="display: none;"> The past years have witnessed a proliferation of large language models (LLMs). Yet, automated and unbiased evaluation of LLMs is challenging due to the inaccuracy of standard metrics in reflecting human preferences and the inefficiency in sampling informative and diverse test examples. While human evaluation remains the gold standard, it is expensive and time-consuming, especially when dealing with a large number of testing samples. To address this problem, we propose a sample-efficient human evaluation method based on MAximum Discrepancy (MAD) competition. MAD automatically selects a small set of informative and diverse instructions, each adapted to two LLMs, whose responses are subject to three-alternative forced choice by human subjects. The pairwise comparison results are then aggregated into a global ranking using the Elo rating system. We select eight representative LLMs and compare them in terms of four skills: knowledge understanding, mathematical reasoning, writing, and coding. Experimental results show that the proposed method achieves a reliable and sensible ranking of LLMs' capabilities, identifies their relative strengths and weaknesses, and offers valuable insights for further LLM advancement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.08008v1-abstract-full').style.display = 'none'; document.getElementById('2404.08008v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">32 pages, 6 figures</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Ding%2C+K&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Ding%2C+K&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Ding%2C+K&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Ding%2C+K&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Ding%2C+K&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>