Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 3,000 results for author: <span class="mathjax">Liu, H</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Liu%2C+H">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Liu, H"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Liu%2C+H&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Liu, H"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Liu%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Liu%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+H&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+H&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+H&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14353">arXiv:2411.14353</a> <span> [<a href="https://arxiv.org/pdf/2411.14353">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Medical Image Segmentation with Deep Learning and Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+H">Houze Liu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+T">Tong Zhou</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+Y">Yanlin Xiang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+A">Aoran Shen</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Jiacheng Hu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+J">Junliang Du</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14353v1-abstract-short" style="display: inline;"> Medical image segmentation is crucial for accurate clinical diagnoses, yet it faces challenges such as low contrast between lesions and normal tissues, unclear boundaries, and high variability across patients. Deep learning has improved segmentation accuracy and efficiency, but it still relies heavily on expert annotations and struggles with the complexities of medical images. The small size of me… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14353v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14353v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14353v1-abstract-full" style="display: none;"> Medical image segmentation is crucial for accurate clinical diagnoses, yet it faces challenges such as low contrast between lesions and normal tissues, unclear boundaries, and high variability across patients. Deep learning has improved segmentation accuracy and efficiency, but it still relies heavily on expert annotations and struggles with the complexities of medical images. The small size of medical image datasets and the high cost of data acquisition further limit the performance of segmentation networks. Diffusion models, with their iterative denoising process, offer a promising alternative for better detail capture in segmentation. However, they face difficulties in accurately segmenting small targets and maintaining the precision of boundary details. This article discusses the importance of medical image segmentation, the limitations of current deep learning approaches, and the potential of diffusion models to address these challenges. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14353v1-abstract-full').style.display = 'none'; document.getElementById('2411.14353v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14201">arXiv:2411.14201</a> <span> [<a href="https://arxiv.org/pdf/2411.14201">pdf</a>, <a href="https://arxiv.org/format/2411.14201">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Regional Attention for Shadow Removal </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hengxing Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Mingjia Li</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+X">Xiaojie Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14201v1-abstract-short" style="display: inline;"> Shadow, as a natural consequence of light interacting with objects, plays a crucial role in shaping the aesthetics of an image, which however also impairs the content visibility and overall visual quality. Recent shadow removal approaches employ the mechanism of attention, due to its effectiveness, as a key component. However, they often suffer from two issues including large model size and high c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14201v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14201v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14201v1-abstract-full" style="display: none;"> Shadow, as a natural consequence of light interacting with objects, plays a crucial role in shaping the aesthetics of an image, which however also impairs the content visibility and overall visual quality. Recent shadow removal approaches employ the mechanism of attention, due to its effectiveness, as a key component. However, they often suffer from two issues including large model size and high computational complexity for practical use. To address these shortcomings, this work devises a lightweight yet accurate shadow removal framework. First, we analyze the characteristics of the shadow removal task to seek the key information required for reconstructing shadow regions and designing a novel regional attention mechanism to effectively capture such information. Then, we customize a Regional Attention Shadow Removal Model (RASM, in short), which leverages non-shadow areas to assist in restoring shadow ones. Unlike existing attention-based models, our regional attention strategy allows each shadow region to interact more rationally with its surrounding non-shadow areas, for seeking the regional contextual correlation between shadow and non-shadow areas. Extensive experiments are conducted to demonstrate that our proposed method delivers superior performance over other state-of-the-art models in terms of accuracy and efficiency, making it appealing for practical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14201v1-abstract-full').style.display = 'none'; document.getElementById('2411.14201v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13789">arXiv:2411.13789</a> <span> [<a href="https://arxiv.org/pdf/2411.13789">pdf</a>, <a href="https://arxiv.org/format/2411.13789">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> LEADRE: Multi-Faceted Knowledge Enhanced LLM Empowered Display Advertisement Recommender System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+F">Fengxin Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yi Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yue Liu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+C">Chao Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuan Wang</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+X">Xiaoxiang Deng</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+W">Wei Xue</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Dapeng Liu</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lei Xiao</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+H">Haijie Gu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Jie Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongyan Liu</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+B">Biao Qin</a>, <a href="/search/cs?searchtype=author&query=He%2C+J">Jun He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13789v1-abstract-short" style="display: inline;"> Display advertising provides significant value to advertisers, publishers, and users. Traditional display advertising systems utilize a multi-stage architecture consisting of retrieval, coarse ranking, and final ranking. However, conventional retrieval methods rely on ID-based learning to rank mechanisms and fail to adequately utilize the content information of ads, which hampers their ability to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13789v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13789v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13789v1-abstract-full" style="display: none;"> Display advertising provides significant value to advertisers, publishers, and users. Traditional display advertising systems utilize a multi-stage architecture consisting of retrieval, coarse ranking, and final ranking. However, conventional retrieval methods rely on ID-based learning to rank mechanisms and fail to adequately utilize the content information of ads, which hampers their ability to provide diverse recommendation lists. To address this limitation, we propose leveraging the extensive world knowledge of LLMs. However, three key challenges arise when attempting to maximize the effectiveness of LLMs: "How to capture user interests", "How to bridge the knowledge gap between LLMs and advertising system", and "How to efficiently deploy LLMs". To overcome these challenges, we introduce a novel LLM-based framework called LLM Empowered Display ADvertisement REcommender system (LEADRE). LEADRE consists of three core modules: (1) The Intent-Aware Prompt Engineering introduces multi-faceted knowledge and designs intent-aware <Prompt, Response> pairs that fine-tune LLMs to generate ads tailored to users' personal interests. (2) The Advertising-Specific Knowledge Alignment incorporates auxiliary fine-tuning tasks and Direct Preference Optimization (DPO) to align LLMs with ad semantic and business value. (3) The Efficient System Deployment deploys LEADRE in an online environment by integrating both latency-tolerant and latency-sensitive service. Extensive offline experiments demonstrate the effectiveness of LEADRE and validate the contributions of individual modules. Online A/B test shows that LEADRE leads to a 1.57% and 1.17% GMV lift for serviced users on WeChat Channels and Moments separately. LEADRE has been deployed on both platforms, serving tens of billions of requests each day. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13789v1-abstract-full').style.display = 'none'; document.getElementById('2411.13789v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13050">arXiv:2411.13050</a> <span> [<a href="https://arxiv.org/pdf/2411.13050">pdf</a>, <a href="https://arxiv.org/format/2411.13050">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> Topkima-Former: Low-energy, Low-Latency Inference for Transformers using top-k In-memory ADC </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dong%2C+S">Shuai Dong</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Junyi Yang</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+X">Xiaoqi Peng</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+H">Hongyang Shang</a>, <a href="/search/cs?searchtype=author&query=Ke%2C+Y">Ye Ke</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaofeng Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongjie Liu</a>, <a href="/search/cs?searchtype=author&query=Basu%2C+A">Arindam Basu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13050v1-abstract-short" style="display: inline;"> Transformer model has gained prominence as a popular deep neural network architecture for neural language processing (NLP) and computer vision (CV) applications. However, the extensive use of nonlinear operations, like softmax, poses a performance bottleneck during transformer inference and comprises up to 40% of the total latency. Hence, we propose innovations at the circuit, architecture, and al… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13050v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13050v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13050v1-abstract-full" style="display: none;"> Transformer model has gained prominence as a popular deep neural network architecture for neural language processing (NLP) and computer vision (CV) applications. However, the extensive use of nonlinear operations, like softmax, poses a performance bottleneck during transformer inference and comprises up to 40% of the total latency. Hence, we propose innovations at the circuit, architecture, and algorithm levels to accelerate the transformer. At the circuit level, we propose topkima-combining top-k activation selection with in-memory ADC (IMA) to implement a low-energy and low-latency softmax without any sorting latency. Only the k largest activations are sent to the softmax calculation block, reducing the huge computational cost of softmax. Using a modified training scheme with top-k only in the forward pass, experimental results demonstrate only a 0.4% to 1.2% reduction in accuracy across ViT, distilBERT, and BERT-base models when evaluated on CIFAR-10, CIFAR-100, and SQuAD datasets with k=5. At the architecture level, an improved scale-free technique is introduced to reduce the computational cost of attention. The combined system, dubbed Topkima-Former, enhances 1.8x-84x speedup and 1.3x-35x energy efficiency (EE) over prior In-memory computing (IMC) accelerators. Compared to a conventional softmax macro and a digital top-k (Dtopk) softmax macro, our proposed tokima softmax macro achieves about 15x and 8x faster speed respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13050v1-abstract-full').style.display = 'none'; document.getElementById('2411.13050v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13024">arXiv:2411.13024</a> <span> [<a href="https://arxiv.org/pdf/2411.13024">pdf</a>, <a href="https://arxiv.org/format/2411.13024">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Prior-based Objective Inference Mining Potential Uncertainty for Facial Expression Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hanwei Liu</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+H">Huiling Cai</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Q">Qingcheng Lin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuefeng Li</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+H">Hui Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13024v1-abstract-short" style="display: inline;"> Annotation ambiguity caused by the inherent subjectivity of visual judgment has always been a major challenge for Facial Expression Recognition (FER) tasks, particularly for largescale datasets from in-the-wild scenarios. A potential solution is the evaluation of relatively objective emotional distributions to help mitigate the ambiguity of subjective annotations. To this end, this paper proposes… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13024v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13024v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13024v1-abstract-full" style="display: none;"> Annotation ambiguity caused by the inherent subjectivity of visual judgment has always been a major challenge for Facial Expression Recognition (FER) tasks, particularly for largescale datasets from in-the-wild scenarios. A potential solution is the evaluation of relatively objective emotional distributions to help mitigate the ambiguity of subjective annotations. To this end, this paper proposes a novel Prior-based Objective Inference (POI) network. This network employs prior knowledge to derive a more objective and varied emotional distribution and tackles the issue of subjective annotation ambiguity through dynamic knowledge transfer. POI comprises two key networks: Firstly, the Prior Inference Network (PIN) utilizes the prior knowledge of AUs and emotions to capture intricate motion details. To reduce over-reliance on priors and facilitate objective emotional inference, PIN aggregates inferential knowledge from various key facial subregions, encouraging mutual learning. Secondly, the Target Recognition Network (TRN) integrates subjective emotion annotations and objective inference soft labels provided by the PIN, fostering an understanding of inherent facial expression diversity, thus resolving annotation ambiguity. Moreover, we introduce an uncertainty estimation module to quantify and balance facial expression confidence. This module enables a flexible approach to dealing with the uncertainties of subjective annotations. Extensive experiments show that POI exhibits competitive performance on both synthetic noisy datasets and multiple real-world datasets. All codes and training logs will be publicly available at https://github.com/liuhw01/POI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13024v1-abstract-full').style.display = 'none'; document.getElementById('2411.13024v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13008">arXiv:2411.13008</a> <span> [<a href="https://arxiv.org/pdf/2411.13008">pdf</a>, <a href="https://arxiv.org/format/2411.13008">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Evaluating LLMs Capabilities Towards Understanding Social Dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tahir%2C+A">Anique Tahir</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+L">Lu Cheng</a>, <a href="/search/cs?searchtype=author&query=Sandoval%2C+M">Manuel Sandoval</a>, <a href="/search/cs?searchtype=author&query=Silva%2C+Y+N">Yasin N. Silva</a>, <a href="/search/cs?searchtype=author&query=Hall%2C+D+L">Deborah L. Hall</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Huan Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13008v1-abstract-short" style="display: inline;"> Social media discourse involves people from different backgrounds, beliefs, and motives. Thus, often such discourse can devolve into toxic interactions. Generative Models, such as Llama and ChatGPT, have recently exploded in popularity due to their capabilities in zero-shot question-answering. Because these models are increasingly being used to ask questions of social significance, a crucial resea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13008v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13008v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13008v1-abstract-full" style="display: none;"> Social media discourse involves people from different backgrounds, beliefs, and motives. Thus, often such discourse can devolve into toxic interactions. Generative Models, such as Llama and ChatGPT, have recently exploded in popularity due to their capabilities in zero-shot question-answering. Because these models are increasingly being used to ask questions of social significance, a crucial research question is whether they can understand social media dynamics. This work provides a critical analysis regarding generative LLM's ability to understand language and dynamics in social contexts, particularly considering cyberbullying and anti-cyberbullying (posts aimed at reducing cyberbullying) interactions. Specifically, we compare and contrast the capabilities of different large language models (LLMs) to understand three key aspects of social dynamics: language, directionality, and the occurrence of bullying/anti-bullying messages. We found that while fine-tuned LLMs exhibit promising results in some social media understanding tasks (understanding directionality), they presented mixed results in others (proper paraphrasing and bullying/anti-bullying detection). We also found that fine-tuning and prompt engineering mechanisms can have positive effects in some tasks. We believe that a understanding of LLM's capabilities is crucial to design future models that can be effectively used in social applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13008v1-abstract-full').style.display = 'none'; document.getElementById('2411.13008v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in ASONAM 24 proceedings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12448">arXiv:2411.12448</a> <span> [<a href="https://arxiv.org/pdf/2411.12448">pdf</a>, <a href="https://arxiv.org/format/2411.12448">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Large Language Models for Lossless Image Compression: Next-Pixel Prediction in Language Space is All You Need </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kecheng Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Pingping Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hui Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jie Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yibing Liu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jixin Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shiqi Wang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+H">Hong Yan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Haoliang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12448v1-abstract-short" style="display: inline;"> We have recently witnessed that ``Intelligence" and `` Compression" are the two sides of the same coin, where the language large model (LLM) with unprecedented intelligence is a general-purpose lossless compressor for various data modalities. This attribute particularly appeals to the lossless image compression community, given the increasing need to compress high-resolution images in the current… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12448v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12448v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12448v1-abstract-full" style="display: none;"> We have recently witnessed that ``Intelligence" and `` Compression" are the two sides of the same coin, where the language large model (LLM) with unprecedented intelligence is a general-purpose lossless compressor for various data modalities. This attribute particularly appeals to the lossless image compression community, given the increasing need to compress high-resolution images in the current streaming media era. Consequently, a spontaneous envision emerges: Can the compression performance of the LLM elevate lossless image compression to new heights? However, our findings indicate that the naive application of LLM-based lossless image compressors suffers from a considerable performance gap compared with existing state-of-the-art (SOTA) codecs on common benchmark datasets. In light of this, we are dedicated to fulfilling the unprecedented intelligence (compression) capacity of the LLM for lossless image compression tasks, thereby bridging the gap between theoretical and practical compression performance. Specifically, we propose P$^{2}$-LLM, a next-pixel prediction-based LLM, which integrates various elaborated insights and methodologies, \textit{e.g.,} pixel-level priors, the in-context ability of LLM, and a pixel-level semantic preservation strategy, to enhance the understanding capacity of pixel sequences for better next-pixel predictions. Extensive experiments on benchmark datasets demonstrate that P$^{2}$-LLM can beat SOTA classical and learned codecs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12448v1-abstract-full').style.display = 'none'; document.getElementById('2411.12448v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12395">arXiv:2411.12395</a> <span> [<a href="https://arxiv.org/pdf/2411.12395">pdf</a>, <a href="https://arxiv.org/format/2411.12395">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Do LLMs Understand Ambiguity in Text? A Case Study in Open-world Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Keluskar%2C+A">Aryan Keluskar</a>, <a href="/search/cs?searchtype=author&query=Bhattacharjee%2C+A">Amrita Bhattacharjee</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Huan Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12395v1-abstract-short" style="display: inline;"> Ambiguity in natural language poses significant challenges to Large Language Models (LLMs) used for open-domain question answering. LLMs often struggle with the inherent uncertainties of human communication, leading to misinterpretations, miscommunications, hallucinations, and biased responses. This significantly weakens their ability to be used for tasks like fact-checking, question answering, fe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12395v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12395v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12395v1-abstract-full" style="display: none;"> Ambiguity in natural language poses significant challenges to Large Language Models (LLMs) used for open-domain question answering. LLMs often struggle with the inherent uncertainties of human communication, leading to misinterpretations, miscommunications, hallucinations, and biased responses. This significantly weakens their ability to be used for tasks like fact-checking, question answering, feature extraction, and sentiment analysis. Using open-domain question answering as a test case, we compare off-the-shelf and few-shot LLM performance, focusing on measuring the impact of explicit disambiguation strategies. We demonstrate how simple, training-free, token-level disambiguation methods may be effectively used to improve LLM performance for ambiguous question answering tasks. We empirically show our findings and discuss best practices and broader impacts regarding ambiguity in LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12395v1-abstract-full').style.display = 'none'; document.getElementById('2411.12395v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at the REU Symposium at IEEE BigData 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11016">arXiv:2411.11016</a> <span> [<a href="https://arxiv.org/pdf/2411.11016">pdf</a>, <a href="https://arxiv.org/format/2411.11016">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Time Step Generating: A Universal Synthesized Deepfake Image Detector </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Ziyue Zeng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haoyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+D">Dingjie Peng</a>, <a href="/search/cs?searchtype=author&query=Jing%2C+L">Luoxu Jing</a>, <a href="/search/cs?searchtype=author&query=Watanabe%2C+H">Hiroshi Watanabe</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11016v2-abstract-short" style="display: inline;"> Currently, high-fidelity text-to-image models are developed in an accelerating pace. Among them, Diffusion Models have led to a remarkable improvement in the quality of image generation, making it vary challenging to distinguish between real and synthesized images. It simultaneously raises serious concerns regarding privacy and security. Some methods are proposed to distinguish the diffusion model… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11016v2-abstract-full').style.display = 'inline'; document.getElementById('2411.11016v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11016v2-abstract-full" style="display: none;"> Currently, high-fidelity text-to-image models are developed in an accelerating pace. Among them, Diffusion Models have led to a remarkable improvement in the quality of image generation, making it vary challenging to distinguish between real and synthesized images. It simultaneously raises serious concerns regarding privacy and security. Some methods are proposed to distinguish the diffusion model generated images through reconstructing. However, the inversion and denoising processes are time-consuming and heavily reliant on the pre-trained generative model. Consequently, if the pre-trained generative model meet the problem of out-of-domain, the detection performance declines. To address this issue, we propose a universal synthetic image detector Time Step Generating (TSG), which does not rely on pre-trained models' reconstructing ability, specific datasets, or sampling algorithms. Our method utilizes a pre-trained diffusion model's network as a feature extractor to capture fine-grained details, focusing on the subtle differences between real and synthetic images. By controlling the time step t of the network input, we can effectively extract these distinguishing detail features. Then, those features can be passed through a classifier (i.e. Resnet), which efficiently detects whether an image is synthetic or real. We test the proposed TSG on the large-scale GenImage benchmark and it achieves significant improvements in both accuracy and generalizability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11016v2-abstract-full').style.display = 'none'; document.getElementById('2411.11016v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 7 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 62H30; 68T07 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.4.9; I.4.7; I.5.2 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10830">arXiv:2411.10830</a> <span> [<a href="https://arxiv.org/pdf/2411.10830">pdf</a>, <a href="https://arxiv.org/format/2411.10830">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> One-Layer Transformer Provably Learns One-Nearest Neighbor In Context </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zihao Li</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yuan Cao</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+C">Cheng Gao</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yihan He</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Han Liu</a>, <a href="/search/cs?searchtype=author&query=Klusowski%2C+J+M">Jason M. Klusowski</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+J">Jianqing Fan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+M">Mengdi Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10830v1-abstract-short" style="display: inline;"> Transformers have achieved great success in recent years. Interestingly, transformers have shown particularly strong in-context learning capability -- even without fine-tuning, they are still able to solve unseen tasks well purely based on task-specific prompts. In this paper, we study the capability of one-layer transformers in learning one of the most classical nonparametric estimators, the one-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10830v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10830v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10830v1-abstract-full" style="display: none;"> Transformers have achieved great success in recent years. Interestingly, transformers have shown particularly strong in-context learning capability -- even without fine-tuning, they are still able to solve unseen tasks well purely based on task-specific prompts. In this paper, we study the capability of one-layer transformers in learning one of the most classical nonparametric estimators, the one-nearest neighbor prediction rule. Under a theoretical framework where the prompt contains a sequence of labeled training data and unlabeled test data, we show that, although the loss function is nonconvex when trained with gradient descent, a single softmax attention layer can successfully learn to behave like a one-nearest neighbor classifier. Our result gives a concrete example of how transformers can be trained to implement nonparametric machine learning algorithms, and sheds light on the role of softmax attention in transformer models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10830v1-abstract-full').style.display = 'none'; document.getElementById('2411.10830v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09879">arXiv:2411.09879</a> <span> [<a href="https://arxiv.org/pdf/2411.09879">pdf</a>, <a href="https://arxiv.org/format/2411.09879">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> A Multi-Label EEG Dataset for Mental Attention State Classification in Online Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+H">Huan Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuzhe Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guanjian Liu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+X">Xinxin Du</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haochong Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Dalin Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09879v1-abstract-short" style="display: inline;"> Attention is a vital cognitive process in the learning and memory environment, particularly in the context of online learning. Traditional methods for classifying attention states of online learners based on behavioral signals are prone to distortion, leading to increased interest in using electroencephalography (EEG) signals for authentic and accurate assessment. However, the field of attention s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09879v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09879v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09879v1-abstract-full" style="display: none;"> Attention is a vital cognitive process in the learning and memory environment, particularly in the context of online learning. Traditional methods for classifying attention states of online learners based on behavioral signals are prone to distortion, leading to increased interest in using electroencephalography (EEG) signals for authentic and accurate assessment. However, the field of attention state classification based on EEG signals in online learning faces challenges, including the scarcity of publicly available datasets, the lack of standardized data collection paradigms, and the requirement to consider the interplay between attention and other psychological states. In light of this, we present the Multi-label EEG dataset for classifying Mental Attention states (MEMA) in online learning. We meticulously designed a reliable and standard experimental paradigm with three attention states: neutral, relaxing, and concentrating, considering human physiological and psychological characteristics. This paradigm collected EEG signals from 20 subjects, each participating in 12 trials, resulting in 1,060 minutes of data. Emotional state labels, basic personal information, and personality traits were also collected to investigate the relationship between attention and other psychological states. Extensive quantitative and qualitative analysis, including a multi-label correlation study, validated the quality of the EEG attention data. The MEMA dataset and analysis provide valuable insights for advancing research on attention in online learning. The dataset is publicly available at \url{https://github.com/GuanjianLiu/MEMA}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09879v1-abstract-full').style.display = 'none'; document.getElementById('2411.09879v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08703">arXiv:2411.08703</a> <span> [<a href="https://arxiv.org/pdf/2411.08703">pdf</a>, <a href="https://arxiv.org/format/2411.08703">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MVKTrans: Multi-View Knowledge Transfer for Robust Multiomics Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cong%2C+S">Shan Cong</a>, <a href="/search/cs?searchtype=author&query=Sang%2C+Z">Zhiling Sang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongwei Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoran Luo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xin Wang</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+H">Hong Liang</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+J">Jie Hao</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xiaohui Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08703v1-abstract-short" style="display: inline;"> The distinct characteristics of multiomics data, including complex interactions within and across biological layers and disease heterogeneity (e.g., heterogeneity in etiology and clinical symptoms), drive us to develop novel designs to address unique challenges in multiomics prediction. In this paper, we propose the multi-view knowledge transfer learning (MVKTrans) framework, which transfers intra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08703v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08703v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08703v1-abstract-full" style="display: none;"> The distinct characteristics of multiomics data, including complex interactions within and across biological layers and disease heterogeneity (e.g., heterogeneity in etiology and clinical symptoms), drive us to develop novel designs to address unique challenges in multiomics prediction. In this paper, we propose the multi-view knowledge transfer learning (MVKTrans) framework, which transfers intra- and inter-omics knowledge in an adaptive manner by reviewing data heterogeneity and suppressing bias transfer, thereby enhancing classification performance. Specifically, we design a graph contrastive module that is trained on unlabeled data to effectively learn and transfer the underlying intra-omics patterns to the supervised task. This unsupervised pretraining promotes learning general and unbiased representations for each modality, regardless of the downstream tasks. In light of the varying discriminative capacities of modalities across different diseases and/or samples, we introduce an adaptive and bi-directional cross-omics distillation module. This module automatically identifies richer modalities and facilitates dynamic knowledge transfer from more informative to less informative omics, thereby enabling a more robust and generalized integration. Extensive experiments on four real biomedical datasets demonstrate the superior performance and robustness of MVKTrans compared to the state-of-the-art. Code and data are available at https://github.com/Yaolab-fantastic/MVKTrans. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08703v1-abstract-full').style.display = 'none'; document.getElementById('2411.08703v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08631">arXiv:2411.08631</a> <span> [<a href="https://arxiv.org/pdf/2411.08631">pdf</a>, <a href="https://arxiv.org/format/2411.08631">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Deep Generative Demand Learning for Newsvendor and Pricing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gong%2C+S">Shijin Gong</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Huihang Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinyu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08631v1-abstract-short" style="display: inline;"> We consider data-driven inventory and pricing decisions in the feature-based newsvendor problem, where demand is influenced by both price and contextual features and is modeled without any structural assumptions. The unknown demand distribution results in a challenging conditional stochastic optimization problem, further complicated by decision-dependent uncertainty and the integration of features… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08631v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08631v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08631v1-abstract-full" style="display: none;"> We consider data-driven inventory and pricing decisions in the feature-based newsvendor problem, where demand is influenced by both price and contextual features and is modeled without any structural assumptions. The unknown demand distribution results in a challenging conditional stochastic optimization problem, further complicated by decision-dependent uncertainty and the integration of features. Inspired by recent advances in deep generative learning, we propose a novel approach leveraging conditional deep generative models (cDGMs) to address these challenges. cDGMs learn the demand distribution and generate probabilistic demand forecasts conditioned on price and features. This generative approach enables accurate profit estimation and supports the design of algorithms for two key objectives: (1) optimizing inventory for arbitrary prices, and (2) jointly determining optimal pricing and inventory levels. We provide theoretical guarantees for our approach, including the consistency of profit estimation and convergence of our decisions to the optimal solution. Extensive simulations-ranging from simple to complex scenarios, including one involving textual features-and a real-world case study demonstrate the effectiveness of our approach. Our method opens a new paradigm in management science and operations research, is adaptable to extensions of the newsvendor and pricing problems, and holds potential for solving other conditional stochastic optimization problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08631v1-abstract-full').style.display = 'none'; document.getElementById('2411.08631v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08158">arXiv:2411.08158</a> <span> [<a href="https://arxiv.org/pdf/2411.08158">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TomoGRAF: A Robust and Generalizable Reconstruction Network for Single-View Computed Tomography </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+D">Di Xu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yang Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hengjie Liu</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+Q">Qihui Lyu</a>, <a href="/search/cs?searchtype=author&query=Descovich%2C+M">Martina Descovich</a>, <a href="/search/cs?searchtype=author&query=Ruan%2C+D">Dan Ruan</a>, <a href="/search/cs?searchtype=author&query=Sheng%2C+K">Ke Sheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08158v1-abstract-short" style="display: inline;"> Computed tomography (CT) provides high spatial resolution visualization of 3D structures for scientific and clinical applications. Traditional analytical/iterative CT reconstruction algorithms require hundreds of angular data samplings, a condition that may not be met in practice due to physical and mechanical limitations. Sparse view CT reconstruction has been proposed using constrained optimizat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08158v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08158v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08158v1-abstract-full" style="display: none;"> Computed tomography (CT) provides high spatial resolution visualization of 3D structures for scientific and clinical applications. Traditional analytical/iterative CT reconstruction algorithms require hundreds of angular data samplings, a condition that may not be met in practice due to physical and mechanical limitations. Sparse view CT reconstruction has been proposed using constrained optimization and machine learning methods with varying success, less so for ultra-sparse view CT reconstruction with one to two views. Neural radiance field (NeRF) is a powerful tool for reconstructing and rendering 3D natural scenes from sparse views, but its direct application to 3D medical image reconstruction has been minimally successful due to the differences between optical and X-ray photon transportation. Here, we develop a novel TomoGRAF framework incorporating the unique X-ray transportation physics to reconstruct high-quality 3D volumes using ultra-sparse projections without prior. TomoGRAF captures the CT imaging geometry, simulates the X-ray casting and tracing process, and penalizes the difference between simulated and ground truth CT sub-volume during training. We evaluated the performance of TomoGRAF on an unseen dataset of distinct imaging characteristics from the training data and demonstrated a vast leap in performance compared with state-of-the-art deep learning and NeRF methods. TomoGRAF provides the first generalizable solution for image-guided radiotherapy and interventional radiology applications, where only one or a few X-ray views are available, but 3D volumetric information is desired. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08158v1-abstract-full').style.display = 'none'; document.getElementById('2411.08158v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08028">arXiv:2411.08028</a> <span> [<a href="https://arxiv.org/pdf/2411.08028">pdf</a>, <a href="https://arxiv.org/format/2411.08028">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Learning with Less: Knowledge Distillation from Large Language Models via Unlabeled Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+J">Juanhui Li</a>, <a href="/search/cs?searchtype=author&query=Nag%2C+S">Sreyashi Nag</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hui Liu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xianfeng Tang</a>, <a href="/search/cs?searchtype=author&query=Sarwar%2C+S">Sheikh Sarwar</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+L">Limeng Cui</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+H">Hansu Gu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Suhang Wang</a>, <a href="/search/cs?searchtype=author&query=He%2C+Q">Qi He</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jiliang Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08028v1-abstract-short" style="display: inline;"> In real-world NLP applications, Large Language Models (LLMs) offer promising solutions due to their extensive training on vast datasets. However, the large size and high computation demands of LLMs limit their practicality in many applications, especially when further fine-tuning is required. To address these limitations, smaller models are typically preferred for deployment. However, their traini… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08028v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08028v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08028v1-abstract-full" style="display: none;"> In real-world NLP applications, Large Language Models (LLMs) offer promising solutions due to their extensive training on vast datasets. However, the large size and high computation demands of LLMs limit their practicality in many applications, especially when further fine-tuning is required. To address these limitations, smaller models are typically preferred for deployment. However, their training is hindered by the scarcity of labeled data. In contrast, unlabeled data is often readily which can be leveraged by using LLMs to generate pseudo-labels for training smaller models. This enables the smaller models (student) to acquire knowledge from LLMs(teacher) while reducing computational costs. This process introduces challenges, such as potential noisy pseudo-labels. Selecting high-quality and informative data is therefore critical to enhance model performance while improving the efficiency of data utilization. To address this, we propose LLKD that enables Learning with Less computational resources and less data for Knowledge Distillation from LLMs. LLKD is an adaptive sample selection method that incorporates signals from both the teacher and student. Specifically, it prioritizes samples where the teacher demonstrates high confidence in its labeling, indicating reliable labels, and where the student exhibits a high information need, identifying challenging samples that require further learning. Our comprehensive experiments show that LLKD achieves superior performance across various datasets with higher data efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08028v1-abstract-full').style.display = 'none'; document.getElementById('2411.08028v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07751">arXiv:2411.07751</a> <span> [<a href="https://arxiv.org/pdf/2411.07751">pdf</a>, <a href="https://arxiv.org/format/2411.07751">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> SAV-SE: Scene-aware Audio-Visual Speech Enhancement with Selective State Space Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qian%2C+X">Xinyuan Qian</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jiaran Gao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yaodan Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qiquan Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hexin Liu</a>, <a href="/search/cs?searchtype=author&query=Garcia%2C+L+P">Leibny Paola Garcia</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Haizhou Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07751v1-abstract-short" style="display: inline;"> Speech enhancement plays an essential role in various applications, and the integration of visual information has been demonstrated to bring substantial advantages. However, the majority of current research concentrates on the examination of facial and lip movements, which can be compromised or entirely inaccessible in scenarios where occlusions occur or when the camera view is distant. Whereas co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07751v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07751v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07751v1-abstract-full" style="display: none;"> Speech enhancement plays an essential role in various applications, and the integration of visual information has been demonstrated to bring substantial advantages. However, the majority of current research concentrates on the examination of facial and lip movements, which can be compromised or entirely inaccessible in scenarios where occlusions occur or when the camera view is distant. Whereas contextual visual cues from the surrounding environment have been overlooked: for example, when we see a dog bark, our brain has the innate ability to discern and filter out the barking noise. To this end, in this paper, we introduce a novel task, i.e. SAV-SE. To our best knowledge, this is the first proposal to use rich contextual information from synchronized video as auxiliary cues to indicate the type of noise, which eventually improves the speech enhancement performance. Specifically, we propose the VC-S$^2$E method, which incorporates the Conformer and Mamba modules for their complementary strengths. Extensive experiments are conducted on public MUSIC, AVSpeech and AudioSet datasets, where the results demonstrate the superiority of VC-S$^2$E over other competitive methods. We will make the source code publicly available. Project demo page: https://AVSEPage.github.io/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07751v1-abstract-full').style.display = 'none'; document.getElementById('2411.07751v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07588">arXiv:2411.07588</a> <span> [<a href="https://arxiv.org/pdf/2411.07588">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> A High-frequency Pneumatic Oscillator for Soft Robotics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+L">Longchuan Li</a>, <a href="/search/cs?searchtype=author&query=He%2C+S">Shuqian He</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+Q">Qiukai Qi</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+Y">Ye Cui</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+C">Cong Yan</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+K">Kaige Jiang</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+S">Shuai Kang</a>, <a href="/search/cs?searchtype=author&query=Tokuda%2C+I+T">Isao T. Tokuda</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhongkui Wang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+S">Shugen Ma</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Huaping Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07588v1-abstract-short" style="display: inline;"> Soft robots, while highly adaptable to diverse environments through various actuation methods, still face significant performance boundary due to the inherent properties of materials. These limitations manifest in the challenge of guaranteeing rapid response and large-scale movements simultaneously, ultimately restricting the robots' absolute speed and overall efficiency. In this paper, we introdu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07588v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07588v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07588v1-abstract-full" style="display: none;"> Soft robots, while highly adaptable to diverse environments through various actuation methods, still face significant performance boundary due to the inherent properties of materials. These limitations manifest in the challenge of guaranteeing rapid response and large-scale movements simultaneously, ultimately restricting the robots' absolute speed and overall efficiency. In this paper, we introduce a high-frequency pneumatic oscillator (HIPO) to overcome these challenges. Through a collision-induced phase resetting mechanism, our HIPO leverages event-based nonlinearity to trigger self-oscillation of pneumatic actuator, which positively utilizes intrinsic characteristics of materials. This enables the system to spontaneously generate periodic control signals and directly produce motion responses, eliminating the need for incorporating external actuation components. By efficiently and rapidly converting internal energy of airflow into the kinetic energy of robots, HIPO achieves a frequency of up to 20 Hz. Furthermore, we demonstrate the versatility and high-performance capabilities of HIPO through bio-inspired robots: an insect-like fast-crawler (with speeds up to 50.27 cm/s), a high-frequency butterfly-like wing-flapper, and a maneuverable duck-like swimmer. By eliminating external components and seamlessly fusing signal generation, energy conversion, and motion output, HIPO unleashes rapid and efficient motion, unlocking potential for high-performance soft robotics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07588v1-abstract-full').style.display = 'none'; document.getElementById('2411.07588v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07112">arXiv:2411.07112</a> <span> [<a href="https://arxiv.org/pdf/2411.07112">pdf</a>, <a href="https://arxiv.org/format/2411.07112">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> ROCODE: Integrating Backtracking Mechanism and Program Analysis in Large Language Models for Code Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xue Jiang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Y">Yihong Dong</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+Y">Yongding Tao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Huanyu Liu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Z">Zhi Jin</a>, <a href="/search/cs?searchtype=author&query=Jiao%2C+W">Wenpin Jiao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Ge Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07112v1-abstract-short" style="display: inline;"> Large language models (LLMs) have achieved impressive performance in code generation recently, offering programmers revolutionary assistance in software development. However, due to the auto-regressive nature of LLMs, they are susceptible to error accumulation during code generation. Once an error is produced, LLMs can merely continue to generate the subsequent code conditioned on it, given their… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07112v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07112v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07112v1-abstract-full" style="display: none;"> Large language models (LLMs) have achieved impressive performance in code generation recently, offering programmers revolutionary assistance in software development. However, due to the auto-regressive nature of LLMs, they are susceptible to error accumulation during code generation. Once an error is produced, LLMs can merely continue to generate the subsequent code conditioned on it, given their inability to adjust previous outputs. Existing LLM-based approaches typically consider post-revising after code generation, leading to the challenging resolution of accumulated errors and the significant wastage of resources. Ideally, LLMs should rollback and resolve the occurred error in time during code generation, rather than proceed on the basis of the error and wait for post-revising after generation. In this paper, we propose ROCODE, which integrates the backtracking mechanism and program analysis into LLMs for code generation. Specifically, we employ program analysis to perform incremental error detection during the generation process. When an error is detected, the backtracking mechanism is triggered to priming rollback strategies and constraint regeneration, thereby eliminating the error early and ensuring continued generation on the correct basis. Experiments on multiple code generation benchmarks show that ROCODE can significantly reduce the errors generated by LLMs, with a compilation pass rate of 99.1%. The test pass rate is improved by up to 23.8% compared to the best baseline approach. Compared to the post-revising baseline, the token cost is reduced by 19.3%. Moreover, our approach is model-agnostic and achieves consistent improvements across nine representative LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07112v1-abstract-full').style.display = 'none'; document.getElementById('2411.07112v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICSE 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06739">arXiv:2411.06739</a> <span> [<a href="https://arxiv.org/pdf/2411.06739">pdf</a>, <a href="https://arxiv.org/format/2411.06739">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Beating Adversarial Low-Rank MDPs with Unknown Transition and Bandit Feedback </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haolin Liu</a>, <a href="/search/cs?searchtype=author&query=Mhammedi%2C+Z">Zakaria Mhammedi</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+C">Chen-Yu Wei</a>, <a href="/search/cs?searchtype=author&query=Zimmert%2C+J">Julian Zimmert</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06739v1-abstract-short" style="display: inline;"> We consider regret minimization in low-rank MDPs with fixed transition and adversarial losses. Previous work has investigated this problem under either full-information loss feedback with unknown transitions (Zhao et al., 2024), or bandit loss feedback with known transition (Foster et al., 2022). First, we improve the $poly(d, A, H)T^{5/6}$ regret bound of Zhao et al. (2024) to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06739v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06739v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06739v1-abstract-full" style="display: none;"> We consider regret minimization in low-rank MDPs with fixed transition and adversarial losses. Previous work has investigated this problem under either full-information loss feedback with unknown transitions (Zhao et al., 2024), or bandit loss feedback with known transition (Foster et al., 2022). First, we improve the $poly(d, A, H)T^{5/6}$ regret bound of Zhao et al. (2024) to $poly(d, A, H)T^{2/3}$ for the full-information unknown transition setting, where d is the rank of the transitions, A is the number of actions, H is the horizon length, and T is the number of episodes. Next, we initiate the study on the setting with bandit loss feedback and unknown transitions. Assuming that the loss has a linear structure, we propose both model based and model free algorithms achieving $poly(d, A, H)T^{2/3}$ regret, though they are computationally inefficient. We also propose oracle-efficient model-free algorithms with $poly(d, A, H)T^{4/5}$ regret. We show that the linear structure is necessary for the bandit case without structure on the reward function, the regret has to scale polynomially with the number of states. This is contrary to the full-information case (Zhao et al., 2024), where the regret can be independent of the number of states even for unstructured reward function. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06739v1-abstract-full').style.display = 'none'; document.getElementById('2411.06739v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06727">arXiv:2411.06727</a> <span> [<a href="https://arxiv.org/pdf/2411.06727">pdf</a>, <a href="https://arxiv.org/format/2411.06727">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Can KAN Work? Exploring the Potential of Kolmogorov-Arnold Networks in Computer Vision </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cang%2C+Y">Yueyang Cang</a>, <a href="/search/cs?searchtype=author&query=liu%2C+Y+h">Yu hang liu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+L">Li Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06727v2-abstract-short" style="display: inline;"> Kolmogorov-Arnold Networks(KANs), as a theoretically efficient neural network architecture, have garnered attention for their potential in capturing complex patterns. However, their application in computer vision remains relatively unexplored. This study first analyzes the potential of KAN in computer vision tasks, evaluating the performance of KAN and its convolutional variants in image classific… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06727v2-abstract-full').style.display = 'inline'; document.getElementById('2411.06727v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06727v2-abstract-full" style="display: none;"> Kolmogorov-Arnold Networks(KANs), as a theoretically efficient neural network architecture, have garnered attention for their potential in capturing complex patterns. However, their application in computer vision remains relatively unexplored. This study first analyzes the potential of KAN in computer vision tasks, evaluating the performance of KAN and its convolutional variants in image classification and semantic segmentation. The focus is placed on examining their characteristics across varying data scales and noise levels. Results indicate that while KAN exhibits stronger fitting capabilities, it is highly sensitive to noise, limiting its robustness. To address this challenge, we propose a smoothness regularization method and introduce a Segment Deactivation technique. Both approaches enhance KAN's stability and generalization, demonstrating its potential in handling complex visual data tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06727v2-abstract-full').style.display = 'none'; document.getElementById('2411.06727v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06655">arXiv:2411.06655</a> <span> [<a href="https://arxiv.org/pdf/2411.06655">pdf</a>, <a href="https://arxiv.org/format/2411.06655">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Explore the Reasoning Capability of LLMs in the Chess Testbed </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shu Wang</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+L">Lei Ji</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Renxi Wang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Wenxiao Zhao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haokun Liu</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+Y">Yifan Hou</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y+N">Ying Nian Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06655v1-abstract-short" style="display: inline;"> Reasoning is a central capability of human intelligence. In recent years, with the advent of large-scale datasets, pretrained large language models have emerged with new capabilities, including reasoning. However, these models still struggle with long-term, complex reasoning tasks, such as playing chess. Based on the observation that expert chess players employ a dual approach combining long-term… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06655v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06655v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06655v1-abstract-full" style="display: none;"> Reasoning is a central capability of human intelligence. In recent years, with the advent of large-scale datasets, pretrained large language models have emerged with new capabilities, including reasoning. However, these models still struggle with long-term, complex reasoning tasks, such as playing chess. Based on the observation that expert chess players employ a dual approach combining long-term strategic play with short-term tactical play along with language explanation, we propose improving the reasoning capability of large language models in chess by integrating annotated strategy and tactic. Specifically, we collect a dataset named MATE, which consists of 1 million chess positions with candidate moves annotated by chess experts for strategy and tactics. We finetune the LLaMA-3-8B model and compare it against state-of-the-art commercial language models in the task of selecting better chess moves. Our experiments show that our models perform better than GPT, Claude, and Gemini models. We find that language explanations can enhance the reasoning capability of large language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06655v1-abstract-full').style.display = 'none'; document.getElementById('2411.06655v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to NAACL2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06184">arXiv:2411.06184</a> <span> [<a href="https://arxiv.org/pdf/2411.06184">pdf</a>, <a href="https://arxiv.org/format/2411.06184">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Alleviating Hyperparameter-Tuning Burden in SVM Classifiers for Pulmonary Nodules Diagnosis with Multi-Task Bayesian Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chi%2C+W">Wenhao Chi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haiping Liu</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+H">Hongqiao Dong</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+W">Wenhua Liang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+B">Bo Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06184v1-abstract-short" style="display: inline;"> In the field of non-invasive medical imaging, radiomic features are utilized to measure tumor characteristics. However, these features can be affected by the techniques used to discretize the images, ultimately impacting the accuracy of diagnosis. To investigate the influence of various image discretization methods on diagnosis, it is common practice to evaluate multiple discretization strategies… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06184v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06184v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06184v1-abstract-full" style="display: none;"> In the field of non-invasive medical imaging, radiomic features are utilized to measure tumor characteristics. However, these features can be affected by the techniques used to discretize the images, ultimately impacting the accuracy of diagnosis. To investigate the influence of various image discretization methods on diagnosis, it is common practice to evaluate multiple discretization strategies individually. This approach often leads to redundant and time-consuming tasks such as training predictive models and fine-tuning hyperparameters separately. This study examines the feasibility of employing multi-task Bayesian optimization to accelerate the hyperparameters search for classifying benign and malignant pulmonary nodules using RBF SVM. Our findings suggest that multi-task Bayesian optimization significantly accelerates the search for hyperparameters in comparison to a single-task approach. To the best of our knowledge, this is the first investigation to utilize multi-task Bayesian optimization in a critical medical context. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06184v1-abstract-full').style.display = 'none'; document.getElementById('2411.06184v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 4 figures, 37 references</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06018">arXiv:2411.06018</a> <span> [<a href="https://arxiv.org/pdf/2411.06018">pdf</a>, <a href="https://arxiv.org/format/2411.06018">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Picture is Worth A Thousand Numbers: Enabling LLMs Reason about Time Series via Visualization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haoxin Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chenghao Liu</a>, <a href="/search/cs?searchtype=author&query=Prakash%2C+B+A">B. Aditya Prakash</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06018v1-abstract-short" style="display: inline;"> Large language models (LLMs), with demonstrated reasoning abilities across multiple domains, are largely underexplored for time-series reasoning (TsR), which is ubiquitous in the real world. In this work, we propose TimerBed, the first comprehensive testbed for evaluating LLMs' TsR performance. Specifically, TimerBed includes stratified reasoning patterns with real-world tasks, comprehensive combi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06018v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06018v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06018v1-abstract-full" style="display: none;"> Large language models (LLMs), with demonstrated reasoning abilities across multiple domains, are largely underexplored for time-series reasoning (TsR), which is ubiquitous in the real world. In this work, we propose TimerBed, the first comprehensive testbed for evaluating LLMs' TsR performance. Specifically, TimerBed includes stratified reasoning patterns with real-world tasks, comprehensive combinations of LLMs and reasoning strategies, and various supervised models as comparison anchors. We perform extensive experiments with TimerBed, test multiple current beliefs, and verify the initial failures of LLMs in TsR, evidenced by the ineffectiveness of zero shot (ZST) and performance degradation of few shot in-context learning (ICL). Further, we identify one possible root cause: the numerical modeling of data. To address this, we propose a prompt-based solution VL-Time, using visualization-modeled data and language-guided reasoning. Experimental results demonstrate that Vl-Time enables multimodal LLMs to be non-trivial ZST and powerful ICL reasoners for time series, achieving about 140% average performance improvement and 99% average token costs reduction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06018v1-abstract-full').style.display = 'none'; document.getElementById('2411.06018v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05881">arXiv:2411.05881</a> <span> [<a href="https://arxiv.org/pdf/2411.05881">pdf</a>, <a href="https://arxiv.org/format/2411.05881">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> MIPD: A Multi-sensory Interactive Perception Dataset for Embodied Intelligent Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhiwei Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tingzhen Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+M">Meihua Zhou</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+D">Dandan Tang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Pengwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wenzhuo Liu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Q">Qiaoning Yang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+T">Tianyu Shen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+K">Kunfeng Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Huaping Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05881v1-abstract-short" style="display: inline;"> During the process of driving, humans usually rely on multiple senses to gather information and make decisions. Analogously, in order to achieve embodied intelligence in autonomous driving, it is essential to integrate multidimensional sensory information in order to facilitate interaction with the environment. However, the current multi-modal fusion sensing schemes often neglect these additional… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05881v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05881v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05881v1-abstract-full" style="display: none;"> During the process of driving, humans usually rely on multiple senses to gather information and make decisions. Analogously, in order to achieve embodied intelligence in autonomous driving, it is essential to integrate multidimensional sensory information in order to facilitate interaction with the environment. However, the current multi-modal fusion sensing schemes often neglect these additional sensory inputs, hindering the realization of fully autonomous driving. This paper considers multi-sensory information and proposes a multi-modal interactive perception dataset named MIPD, enabling expanding the current autonomous driving algorithm framework, for supporting the research on embodied intelligent driving. In addition to the conventional camera, lidar, and 4D radar data, our dataset incorporates multiple sensor inputs including sound, light intensity, vibration intensity and vehicle speed to enrich the dataset comprehensiveness. Comprising 126 consecutive sequences, many exceeding twenty seconds, MIPD features over 8,500 meticulously synchronized and annotated frames. Moreover, it encompasses many challenging scenarios, covering various road and lighting conditions. The dataset has undergone thorough experimental validation, producing valuable insights for the exploration of next-generation autonomous driving frameworks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05881v1-abstract-full').style.display = 'none'; document.getElementById('2411.05881v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Data, development kit and more details will be available at https://github.com/BUCT-IUSRC/Dataset MIPD</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05764">arXiv:2411.05764</a> <span> [<a href="https://arxiv.org/pdf/2411.05764">pdf</a>, <a href="https://arxiv.org/format/2411.05764">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FinDVer: Explainable Claim Verification over Long and Hybrid-Content Financial Documents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yilun Zhao</a>, <a href="/search/cs?searchtype=author&query=Long%2C+Y">Yitao Long</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yuru Jiang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chengye Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Weiyuan Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongjun Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yiming Zhang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xiangru Tang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+C">Chen Zhao</a>, <a href="/search/cs?searchtype=author&query=Cohan%2C+A">Arman Cohan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05764v1-abstract-short" style="display: inline;"> We introduce FinDVer, a comprehensive benchmark specifically designed to evaluate the explainable claim verification capabilities of LLMs in the context of understanding and analyzing long, hybrid-content financial documents. FinDVer contains 2,400 expert-annotated examples, divided into three subsets: information extraction, numerical reasoning, and knowledge-intensive reasoning, each addressing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05764v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05764v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05764v1-abstract-full" style="display: none;"> We introduce FinDVer, a comprehensive benchmark specifically designed to evaluate the explainable claim verification capabilities of LLMs in the context of understanding and analyzing long, hybrid-content financial documents. FinDVer contains 2,400 expert-annotated examples, divided into three subsets: information extraction, numerical reasoning, and knowledge-intensive reasoning, each addressing common scenarios encountered in real-world financial contexts. We assess a broad spectrum of LLMs under long-context and RAG settings. Our results show that even the current best-performing system, GPT-4o, still lags behind human experts. We further provide in-depth analysis on long-context and RAG setting, Chain-of-Thought reasoning, and model reasoning errors, offering insights to drive future advancements. We believe that FinDVer can serve as a valuable benchmark for evaluating LLMs in claim verification over complex, expert-domain documents. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05764v1-abstract-full').style.display = 'none'; document.getElementById('2411.05764v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05750">arXiv:2411.05750</a> <span> [<a href="https://arxiv.org/pdf/2411.05750">pdf</a>, <a href="https://arxiv.org/ps/2411.05750">ps</a>, <a href="https://arxiv.org/format/2411.05750">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> On Differentially Private String Distances </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+J+Y">Jerry Yao-Chieh Hu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+E">Erzhi Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Han Liu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Z">Zhao Song</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lichen Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05750v1-abstract-short" style="display: inline;"> Given a database of bit strings $A_1,\ldots,A_m\in \{0,1\}^n$, a fundamental data structure task is to estimate the distances between a given query $B\in \{0,1\}^n$ with all the strings in the database. In addition, one might further want to ensure the integrity of the database by releasing these distance statistics in a secure manner. In this work, we propose differentially private (DP) data stru… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05750v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05750v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05750v1-abstract-full" style="display: none;"> Given a database of bit strings $A_1,\ldots,A_m\in \{0,1\}^n$, a fundamental data structure task is to estimate the distances between a given query $B\in \{0,1\}^n$ with all the strings in the database. In addition, one might further want to ensure the integrity of the database by releasing these distance statistics in a secure manner. In this work, we propose differentially private (DP) data structures for this type of tasks, with a focus on Hamming and edit distance. On top of the strong privacy guarantees, our data structures are also time- and space-efficient. In particular, our data structure is $蔚$-DP against any sequence of queries of arbitrary length, and for any query $B$ such that the maximum distance to any string in the database is at most $k$, we output $m$ distance estimates. Moreover, - For Hamming distance, our data structure answers any query in $\widetilde O(mk+n)$ time and each estimate deviates from the true distance by at most $\widetilde O(k/e^{蔚/\log k})$; - For edit distance, our data structure answers any query in $\widetilde O(mk^2+n)$ time and each estimate deviates from the true distance by at most $\widetilde O(k/e^{蔚/(\log k \log n)})$. For moderate $k$, both data structures support sublinear query operations. We obtain these results via a novel adaptation of the randomized response technique as a bit flipping procedure, applied to the sketched strings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05750v1-abstract-full').style.display = 'none'; document.getElementById('2411.05750v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04905">arXiv:2411.04905</a> <span> [<a href="https://arxiv.org/pdf/2411.04905">pdf</a>, <a href="https://arxiv.org/format/2411.04905">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> OpenCoder: The Open Cookbook for Top-Tier Code Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+S">Siming Huang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+T">Tianhao Cheng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J+K">J. K. Liu</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+J">Jiaran Hao</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Liuyihan Song</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yang Xu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">J. Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J+H">J. H. Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chenchen Zhang</a>, <a href="/search/cs?searchtype=author&query=Chai%2C+L">Linzheng Chai</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+R">Ruifeng Yuan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhaoxiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+J">Jie Fu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qian Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Ge Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zili Wang</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+Y">Yuan Qi</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yinghui Xu</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+W">Wei Chu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04905v2-abstract-short" style="display: inline;"> Large language models (LLMs) for code have become indispensable in various domains, including code generation, reasoning tasks and agent systems. While open-access code LLMs are increasingly approaching the performance levels of proprietary models, high-quality code LLMs suitable for rigorous scientific investigation, particularly those with reproducible data processing pipelines and transparent t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04905v2-abstract-full').style.display = 'inline'; document.getElementById('2411.04905v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04905v2-abstract-full" style="display: none;"> Large language models (LLMs) for code have become indispensable in various domains, including code generation, reasoning tasks and agent systems. While open-access code LLMs are increasingly approaching the performance levels of proprietary models, high-quality code LLMs suitable for rigorous scientific investigation, particularly those with reproducible data processing pipelines and transparent training protocols, remain limited. The scarcity is due to various challenges, including resource constraints, ethical considerations, and the competitive advantages of keeping models advanced. To address the gap, we introduce OpenCoder, a top-tier code LLM that not only achieves performance comparable to leading models but also serves as an "open cookbook" for the research community. Unlike most prior efforts, we release not only model weights and inference code, but also the reproducible training data, complete data processing pipeline, rigorous experimental ablation results, and detailed training protocols for open scientific research. Through this comprehensive release, we identify the key ingredients for building a top-tier code LLM: (1) code optimized heuristic rules for data cleaning and methods for data deduplication, (2) recall of text corpus related to code and (3) high-quality synthetic data in both annealing and supervised fine-tuning stages. By offering this level of openness, we aim to broaden access to all aspects of a top-tier code LLM, with OpenCoder serving as both a powerful model and an open foundation to accelerate research, and enable reproducible advancements in code AI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04905v2-abstract-full').style.display = 'none'; document.getElementById('2411.04905v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04826">arXiv:2411.04826</a> <span> [<a href="https://arxiv.org/pdf/2411.04826">pdf</a>, <a href="https://arxiv.org/format/2411.04826">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> D$^3$epth: Self-Supervised Depth Estimation with Dynamic Mask in Dynamic Scenes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+S">Siyu Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hong Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenhao Li</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Ying Zhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Guoquan Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jianbing Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04826v1-abstract-short" style="display: inline;"> Depth estimation is a crucial technology in robotics. Recently, self-supervised depth estimation methods have demonstrated great potential as they can efficiently leverage large amounts of unlabelled real-world data. However, most existing methods are designed under the assumption of static scenes, which hinders their adaptability in dynamic environments. To address this issue, we present D$^3$ept… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04826v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04826v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04826v1-abstract-full" style="display: none;"> Depth estimation is a crucial technology in robotics. Recently, self-supervised depth estimation methods have demonstrated great potential as they can efficiently leverage large amounts of unlabelled real-world data. However, most existing methods are designed under the assumption of static scenes, which hinders their adaptability in dynamic environments. To address this issue, we present D$^3$epth, a novel method for self-supervised depth estimation in dynamic scenes. It tackles the challenge of dynamic objects from two key perspectives. First, within the self-supervised framework, we design a reprojection constraint to identify regions likely to contain dynamic objects, allowing the construction of a dynamic mask that mitigates their impact at the loss level. Second, for multi-frame depth estimation, we introduce a cost volume auto-masking strategy that leverages adjacent frames to identify regions associated with dynamic objects and generate corresponding masks. This provides guidance for subsequent processes. Furthermore, we propose a spectral entropy uncertainty module that incorporates spectral entropy to guide uncertainty estimation during depth fusion, effectively addressing issues arising from cost volume computation in dynamic environments. Extensive experiments on KITTI and Cityscapes datasets demonstrate that the proposed method consistently outperforms existing self-supervised monocular depth estimation baselines. Code is available at \url{https://github.com/Csyunling/D3epth}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04826v1-abstract-full').style.display = 'none'; document.getElementById('2411.04826v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Open sourced</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04444">arXiv:2411.04444</a> <span> [<a href="https://arxiv.org/pdf/2411.04444">pdf</a>, <a href="https://arxiv.org/format/2411.04444">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> An Empirical Study on the Potential of LLMs in Automated Software Refactoring </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+B">Bo Liu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yanjie Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuxia Zhang</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+N">Nan Niu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Guangjie Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hui Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04444v1-abstract-short" style="display: inline;"> Recent advances in large language models (LLMs), make it potentially feasible to automatically refactor source code with LLMs. However, it remains unclear how well LLMs perform compared to human experts in conducting refactorings automatically and accurately. To fill this gap, in this paper, we conduct an empirical study to investigate the potential of LLMs in automated software refactoring, focus… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04444v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04444v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04444v1-abstract-full" style="display: none;"> Recent advances in large language models (LLMs), make it potentially feasible to automatically refactor source code with LLMs. However, it remains unclear how well LLMs perform compared to human experts in conducting refactorings automatically and accurately. To fill this gap, in this paper, we conduct an empirical study to investigate the potential of LLMs in automated software refactoring, focusing on the identification of refactoring opportunities and the recommendation of refactoring solutions. We first construct a high-quality refactoring dataset comprising 180 real-world refactorings from 20 projects, and conduct the empirical study on the dataset. With the to-be-refactored Java documents as input, ChatGPT and Gemini identified only 28 and 7 respectively out of the 180 refactoring opportunities. However, explaining the expected refactoring subcategories and narrowing the search space in the prompts substantially increased the success rate of ChatGPT from 15.6% to 86.7%. Concerning the recommendation of refactoring solutions, ChatGPT recommended 176 refactoring solutions for the 180 refactorings, and 63.6% of the recommended solutions were comparable to (even better than) those constructed by human experts. However, 13 out of the 176 solutions suggested by ChatGPT and 9 out of the 137 solutions suggested by Gemini were unsafe in that they either changed the functionality of the source code or introduced syntax errors, which indicate the risk of LLM-based refactoring. To this end, we propose a detect-and-reapply tactic, called RefactoringMirror, to avoid such unsafe refactorings. By reapplying the identified refactorings to the original code using thoroughly tested refactoring engines, we can effectively mitigate the risks associated with LLM-based automated refactoring while still leveraging LLM's intelligence to obtain valuable refactoring recommendations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04444v1-abstract-full').style.display = 'none'; document.getElementById('2411.04444v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04404">arXiv:2411.04404</a> <span> [<a href="https://arxiv.org/pdf/2411.04404">pdf</a>, <a href="https://arxiv.org/format/2411.04404">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Bronchoscopy Depth Estimation through Synthetic-to-Real Domain Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tian%2C+Q">Qingyao Tian</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+H">Huai Liao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xinyan Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lujie Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongbin Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04404v1-abstract-short" style="display: inline;"> Monocular depth estimation has shown promise in general imaging tasks, aiding in localization and 3D reconstruction. While effective in various domains, its application to bronchoscopic images is hindered by the lack of labeled data, challenging the use of supervised learning methods. In this work, we propose a transfer learning framework that leverages synthetic data with depth labels for trainin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04404v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04404v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04404v1-abstract-full" style="display: none;"> Monocular depth estimation has shown promise in general imaging tasks, aiding in localization and 3D reconstruction. While effective in various domains, its application to bronchoscopic images is hindered by the lack of labeled data, challenging the use of supervised learning methods. In this work, we propose a transfer learning framework that leverages synthetic data with depth labels for training and adapts domain knowledge for accurate depth estimation in real bronchoscope data. Our network demonstrates improved depth prediction on real footage using domain adaptation compared to training solely on synthetic data, validating our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04404v1-abstract-full').style.display = 'none'; document.getElementById('2411.04404v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03914">arXiv:2411.03914</a> <span> [<a href="https://arxiv.org/pdf/2411.03914">pdf</a>, <a href="https://arxiv.org/format/2411.03914">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Game-Theoretic Machine Unlearning: Mitigating Extra Privacy Leakage </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hengzhu Liu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+T">Tianqing Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lefeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+P">Ping Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03914v1-abstract-short" style="display: inline;"> With the extensive use of machine learning technologies, data providers encounter increasing privacy risks. Recent legislation, such as GDPR, obligates organizations to remove requested data and its influence from a trained model. Machine unlearning is an emerging technique designed to enable machine learning models to erase users' private information. Although several efficient machine unlearning… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03914v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03914v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03914v1-abstract-full" style="display: none;"> With the extensive use of machine learning technologies, data providers encounter increasing privacy risks. Recent legislation, such as GDPR, obligates organizations to remove requested data and its influence from a trained model. Machine unlearning is an emerging technique designed to enable machine learning models to erase users' private information. Although several efficient machine unlearning schemes have been proposed, these methods still have limitations. First, removing the contributions of partial data may lead to model performance degradation. Second, discrepancies between the original and generated unlearned models can be exploited by attackers to obtain target sample's information, resulting in additional privacy leakage risks. To address above challenges, we proposed a game-theoretic machine unlearning algorithm that simulates the competitive relationship between unlearning performance and privacy protection. This algorithm comprises unlearning and privacy modules. The unlearning module possesses a loss function composed of model distance and classification error, which is used to derive the optimal strategy. The privacy module aims to make it difficult for an attacker to infer membership information from the unlearned data, thereby reducing the privacy leakage risk during the unlearning process. Additionally, the experimental results on real-world datasets demonstrate that this game-theoretic unlearning algorithm's effectiveness and its ability to generate an unlearned model with a performance similar to that of the retrained one while mitigating extra privacy leakage risks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03914v1-abstract-full').style.display = 'none'; document.getElementById('2411.03914v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03865">arXiv:2411.03865</a> <span> [<a href="https://arxiv.org/pdf/2411.03865">pdf</a>, <a href="https://arxiv.org/format/2411.03865">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> AdaSociety: An Adaptive Environment with Social Structures for Multi-Agent Decision-Making </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yizhe Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xingbo Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hao Liu</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+F">Fanqi Kong</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+A">Aoyang Qin</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+M">Min Tang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaoxi Wang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+S">Song-Chun Zhu</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+M">Mingjie Bi</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+S">Siyuan Qi</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+X">Xue Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03865v1-abstract-short" style="display: inline;"> Traditional interactive environments limit agents' intelligence growth with fixed tasks. Recently, single-agent environments address this by generating new tasks based on agent actions, enhancing task diversity. We consider the decision-making problem in multi-agent settings, where tasks are further influenced by social connections, affecting rewards and information access. However, existing multi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03865v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03865v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03865v1-abstract-full" style="display: none;"> Traditional interactive environments limit agents' intelligence growth with fixed tasks. Recently, single-agent environments address this by generating new tasks based on agent actions, enhancing task diversity. We consider the decision-making problem in multi-agent settings, where tasks are further influenced by social connections, affecting rewards and information access. However, existing multi-agent environments lack a combination of adaptive physical surroundings and social connections, hindering the learning of intelligent behaviors. To address this, we introduce AdaSociety, a customizable multi-agent environment featuring expanding state and action spaces, alongside explicit and alterable social structures. As agents progress, the environment adaptively generates new tasks with social structures for agents to undertake. In AdaSociety, we develop three mini-games showcasing distinct social structures and tasks. Initial results demonstrate that specific social structures can promote both individual and collective benefits, though current reinforcement learning and LLM-based algorithms show limited effectiveness in leveraging social structures to enhance performance. Overall, AdaSociety serves as a valuable research platform for exploring intelligence in diverse physical and social settings. The code is available at https://github.com/bigai-ai/AdaSociety. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03865v1-abstract-full').style.display = 'none'; document.getElementById('2411.03865v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at NeurIPS D&B 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03747">arXiv:2411.03747</a> <span> [<a href="https://arxiv.org/pdf/2411.03747">pdf</a>, <a href="https://arxiv.org/format/2411.03747">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Observability-Aware Control for Cooperatively Localizing Quadrotor UAVs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Go%2C+H+S+H">H S Helson Go</a>, <a href="/search/cs?searchtype=author&query=Chong%2C+C+L">Ching Lok Chong</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+L">Longhao Qian</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H+H+-">Hugh H. -T. Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03747v1-abstract-short" style="display: inline;"> Cooperatively Localizing robots should seek optimal control strategies to maximize precision of position estimation and ensure safety in flight. Observability-Aware Trajectory Optimization has strong potential to address this issue, but no concrete link between observability and precision has been proven yet. In this paper, we prove that improvement in positioning precision inherently follows from… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03747v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03747v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03747v1-abstract-full" style="display: none;"> Cooperatively Localizing robots should seek optimal control strategies to maximize precision of position estimation and ensure safety in flight. Observability-Aware Trajectory Optimization has strong potential to address this issue, but no concrete link between observability and precision has been proven yet. In this paper, we prove that improvement in positioning precision inherently follows from optimizing observability. Based on this finding, we develop an Observability-Aware Control principle to generate observability-optimal control strategies. We implement this principle in a Model Predictive Control framework, and we verify it on a team of quadrotor Unmanned Aerial Vehicles comprising a follower vehicle localizing itself by tracking a leader vehicle in both simulations and real-world flight tests. Our results demonstrate that maximizing observability contributed to improving global positioning precision for the quadrotor team. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03747v1-abstract-full').style.display = 'none'; document.getElementById('2411.03747v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03644">arXiv:2411.03644</a> <span> [<a href="https://arxiv.org/pdf/2411.03644">pdf</a>, <a href="https://arxiv.org/format/2411.03644">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Deploying Multi-task Online Server with Large Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qu%2C+Y">Yincen Qu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+C">Chao Ma</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+X">Xiangying Dai</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Hui Zhou</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yiting Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hengyue Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03644v2-abstract-short" style="display: inline;"> In the industry, numerous tasks are deployed online. Traditional approaches often tackle each task separately by its own network, which leads to excessive costs for developing and scaling models, especially in the context of large language models. Although multi-task methods can save costs through parameter sharing, they often struggle to outperform single-task methods in real-world applications.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03644v2-abstract-full').style.display = 'inline'; document.getElementById('2411.03644v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03644v2-abstract-full" style="display: none;"> In the industry, numerous tasks are deployed online. Traditional approaches often tackle each task separately by its own network, which leads to excessive costs for developing and scaling models, especially in the context of large language models. Although multi-task methods can save costs through parameter sharing, they often struggle to outperform single-task methods in real-world applications. To tackle these challenges, we present a three-stage multi-task learning framework for large language models. It involves task filtering, followed by fine-tuning on high-resource tasks, and finally fine-tuning on all tasks. We conducted comprehensive experiments in single-task and multi-task settings. Our approach, exemplified on different benchmarks, demonstrates that it is able to achieve performance comparable to the single-task method while reducing up to 90.9\% of its overhead. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03644v2-abstract-full').style.display = 'none'; document.getElementById('2411.03644v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by COLING 2025 Industry Track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03331">arXiv:2411.03331</a> <span> [<a href="https://arxiv.org/pdf/2411.03331">pdf</a>, <a href="https://arxiv.org/format/2411.03331">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Discrete Mathematics">cs.DM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Hypergraphs as Weighted Directed Self-Looped Graphs: Spectral Properties, Clustering, Cheeger Inequality </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zihao Li</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+D">Dongqi Fu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hengyu Liu</a>, <a href="/search/cs?searchtype=author&query=He%2C+J">Jingrui He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03331v1-abstract-short" style="display: inline;"> Hypergraphs naturally arise when studying group relations and have been widely used in the field of machine learning. There has not been a unified formulation of hypergraphs, yet the recently proposed edge-dependent vertex weights (EDVW) modeling is one of the most generalized modeling methods of hypergraphs, i.e., most existing hypergraphs can be formulated as EDVW hypergraphs without any informa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03331v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03331v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03331v1-abstract-full" style="display: none;"> Hypergraphs naturally arise when studying group relations and have been widely used in the field of machine learning. There has not been a unified formulation of hypergraphs, yet the recently proposed edge-dependent vertex weights (EDVW) modeling is one of the most generalized modeling methods of hypergraphs, i.e., most existing hypergraphs can be formulated as EDVW hypergraphs without any information loss to the best of our knowledge. However, the relevant algorithmic developments on EDVW hypergraphs remain nascent: compared to spectral graph theories, the formulations are incomplete, the spectral clustering algorithms are not well-developed, and one result regarding hypergraph Cheeger Inequality is even incorrect. To this end, deriving a unified random walk-based formulation, we propose our definitions of hypergraph Rayleigh Quotient, NCut, boundary/cut, volume, and conductance, which are consistent with the corresponding definitions on graphs. Then, we prove that the normalized hypergraph Laplacian is associated with the NCut value, which inspires our HyperClus-G algorithm for spectral clustering on EDVW hypergraphs. Finally, we prove that HyperClus-G can always find an approximately linearly optimal partitioning in terms of Both NCut and conductance. Additionally, we provide extensive experiments to validate our theoretical findings from an empirical perspective. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03331v1-abstract-full').style.display = 'none'; document.getElementById('2411.03331v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint, 31 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03047">arXiv:2411.03047</a> <span> [<a href="https://arxiv.org/pdf/2411.03047">pdf</a>, <a href="https://arxiv.org/format/2411.03047">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> GarVerseLOD: High-Fidelity 3D Garment Reconstruction from a Single In-the-Wild Image using a Dataset with Levels of Details </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+Z">Zhongjin Luo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haolin Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chenghong Li</a>, <a href="/search/cs?searchtype=author&query=Du%2C+W">Wanghao Du</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Z">Zirong Jin</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+W">Wanhu Sun</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+Y">Yinyu Nie</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Weikai Chen</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xiaoguang Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03047v1-abstract-short" style="display: inline;"> Neural implicit functions have brought impressive advances to the state-of-the-art of clothed human digitization from multiple or even single images. However, despite the progress, current arts still have difficulty generalizing to unseen images with complex cloth deformation and body poses. In this work, we present GarVerseLOD, a new dataset and framework that paves the way to achieving unprecede… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03047v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03047v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03047v1-abstract-full" style="display: none;"> Neural implicit functions have brought impressive advances to the state-of-the-art of clothed human digitization from multiple or even single images. However, despite the progress, current arts still have difficulty generalizing to unseen images with complex cloth deformation and body poses. In this work, we present GarVerseLOD, a new dataset and framework that paves the way to achieving unprecedented robustness in high-fidelity 3D garment reconstruction from a single unconstrained image. Inspired by the recent success of large generative models, we believe that one key to addressing the generalization challenge lies in the quantity and quality of 3D garment data. Towards this end, GarVerseLOD collects 6,000 high-quality cloth models with fine-grained geometry details manually created by professional artists. In addition to the scale of training data, we observe that having disentangled granularities of geometry can play an important role in boosting the generalization capability and inference accuracy of the learned model. We hence craft GarVerseLOD as a hierarchical dataset with levels of details (LOD), spanning from detail-free stylized shape to pose-blended garment with pixel-aligned details. This allows us to make this highly under-constrained problem tractable by factorizing the inference into easier tasks, each narrowed down with smaller searching space. To ensure GarVerseLOD can generalize well to in-the-wild images, we propose a novel labeling paradigm based on conditional diffusion models to generate extensive paired images for each garment model with high photorealism. We evaluate our method on a massive amount of in-the-wild images. Experimental results demonstrate that GarVerseLOD can generate standalone garment pieces with significantly better quality than prior approaches. Project page: https://garverselod.github.io/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03047v1-abstract-full').style.display = 'none'; document.getElementById('2411.03047v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://garverselod.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02457">arXiv:2411.02457</a> <span> [<a href="https://arxiv.org/pdf/2411.02457">pdf</a>, <a href="https://arxiv.org/format/2411.02457">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Multi-Task Role-Playing Agent Capable of Imitating Character Linguistic Styles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+S">Siyuan Chen</a>, <a href="/search/cs?searchtype=author&query=Si%2C+Q">Qingyi Si</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chenxu Yang</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+Y">Yunzhi Liang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zheng Lin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Huan Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Weiping Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02457v1-abstract-short" style="display: inline;"> The advent of large language models (LLMs) has significantly propelled the advancement of Role-Playing Agents (RPAs). However, current Role-Playing Agents predominantly focus on mimicking a character's fundamental attributes while neglecting the replication of linguistic style, and they are incapable of effectively replicating characters when performing tasks beyond multi-turn dialogues, which res… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02457v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02457v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02457v1-abstract-full" style="display: none;"> The advent of large language models (LLMs) has significantly propelled the advancement of Role-Playing Agents (RPAs). However, current Role-Playing Agents predominantly focus on mimicking a character's fundamental attributes while neglecting the replication of linguistic style, and they are incapable of effectively replicating characters when performing tasks beyond multi-turn dialogues, which results in generated responses that lack authenticity. The reason current RPAs lack this capability is due to the nature of existing character datasets, which lack collections of character quotations and are limited to multi-turn dialogue tasks, constraining the RPA's performance across other task domains and failing to mimic a character's linguistic style. To address this gap, we developed a multi-task role-playing dataset named MRstyle, which encompasses a substantial number of real individuals along with their quotations and covers seven different tasks. On this basis, we develop StyleRPA, a Multi-Task Role-Playing Agent (MRPA) that significantly outperforms recent open-source LLMs and RPAs baselines on 7 tasks including Dialogue, Dictionary, Composition, Story Generation, Product Description, Music Commentary, and Open Question Answering. The code and data will be released. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02457v1-abstract-full').style.display = 'none'; document.getElementById('2411.02457v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02397">arXiv:2411.02397</a> <span> [<a href="https://arxiv.org/pdf/2411.02397">pdf</a>, <a href="https://arxiv.org/format/2411.02397">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Adaptive Caching for Faster Video Generation with Diffusion Transformers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kahatapitiya%2C+K">Kumara Kahatapitiya</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haozhe Liu</a>, <a href="/search/cs?searchtype=author&query=He%2C+S">Sen He</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Ding Liu</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+M">Menglin Jia</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chenyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Ryoo%2C+M+S">Michael S. Ryoo</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tian Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02397v2-abstract-short" style="display: inline;"> Generating temporally-consistent high-fidelity videos can be computationally expensive, especially over longer temporal spans. More-recent Diffusion Transformers (DiTs) -- despite making significant headway in this context -- have only heightened such challenges as they rely on larger models and heavier attention mechanisms, resulting in slower inference speeds. In this paper, we introduce a train… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02397v2-abstract-full').style.display = 'inline'; document.getElementById('2411.02397v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02397v2-abstract-full" style="display: none;"> Generating temporally-consistent high-fidelity videos can be computationally expensive, especially over longer temporal spans. More-recent Diffusion Transformers (DiTs) -- despite making significant headway in this context -- have only heightened such challenges as they rely on larger models and heavier attention mechanisms, resulting in slower inference speeds. In this paper, we introduce a training-free method to accelerate video DiTs, termed Adaptive Caching (AdaCache), which is motivated by the fact that "not all videos are created equal": meaning, some videos require fewer denoising steps to attain a reasonable quality than others. Building on this, we not only cache computations through the diffusion process, but also devise a caching schedule tailored to each video generation, maximizing the quality-latency trade-off. We further introduce a Motion Regularization (MoReg) scheme to utilize video information within AdaCache, essentially controlling the compute allocation based on motion content. Altogether, our plug-and-play contributions grant significant inference speedups (e.g. up to 4.7x on Open-Sora 720p - 2s video generation) without sacrificing the generation quality, across multiple video DiT baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02397v2-abstract-full').style.display = 'none'; document.getElementById('2411.02397v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project-page is available at https://adacache-dit.github.io</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02327">arXiv:2411.02327</a> <span> [<a href="https://arxiv.org/pdf/2411.02327">pdf</a>, <a href="https://arxiv.org/format/2411.02327">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PPLLaVA: Varied Video Sequence Understanding With Prompt Guidance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+R">Ruyang Liu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+H">Haoran Tang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haibo Liu</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+Y">Yixiao Ge</a>, <a href="/search/cs?searchtype=author&query=Shan%2C+Y">Ying Shan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chen Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jiankun Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02327v2-abstract-short" style="display: inline;"> The past year has witnessed the significant advancement of video-based large language models. However, the challenge of developing a unified model for both short and long video understanding remains unresolved. Most existing video LLMs cannot handle hour-long videos, while methods custom for long videos tend to be ineffective for shorter videos and images. In this paper, we identify the key issue… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02327v2-abstract-full').style.display = 'inline'; document.getElementById('2411.02327v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02327v2-abstract-full" style="display: none;"> The past year has witnessed the significant advancement of video-based large language models. However, the challenge of developing a unified model for both short and long video understanding remains unresolved. Most existing video LLMs cannot handle hour-long videos, while methods custom for long videos tend to be ineffective for shorter videos and images. In this paper, we identify the key issue as the redundant content in videos. To address this, we propose a novel pooling strategy that simultaneously achieves token compression and instruction-aware visual feature aggregation. Our model is termed Prompt-guided Pooling LLaVA, or PPLLaVA for short. Specifically, PPLLaVA consists of three core components: the CLIP-based visual-prompt alignment that extracts visual information relevant to the user's instructions, the prompt-guided pooling that compresses the visual sequence to arbitrary scales using convolution-style pooling, and the clip context extension designed for lengthy prompt common in visual dialogue. Moreover, our codebase also integrates the most advanced video Direct Preference Optimization (DPO) and visual interleave training. Extensive experiments have validated the performance of our model. With superior throughput and only 1024 visual context, PPLLaVA achieves better results on image benchmarks as a video LLM, while achieving state-of-the-art performance across various video benchmarks, excelling in tasks ranging from caption generation to multiple-choice questions, and handling video lengths from seconds to hours. Codes have been available at https://github.com/farewellthree/PPLLaVA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02327v2-abstract-full').style.display = 'none'; document.getElementById('2411.02327v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02265">arXiv:2411.02265</a> <span> [<a href="https://arxiv.org/pdf/2411.02265">pdf</a>, <a href="https://arxiv.org/format/2411.02265">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Hunyuan-Large: An Open-Source MoE Model with 52 Billion Activated Parameters by Tencent </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+X">Xingwu Sun</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yanfeng Chen</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yiqing Huang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+R">Ruobing Xie</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jiaqi Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kai Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shuaipeng Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhen Yang</a>, <a href="/search/cs?searchtype=author&query=Han%2C+J">Jonny Han</a>, <a href="/search/cs?searchtype=author&query=Shu%2C+X">Xiaobo Shu</a>, <a href="/search/cs?searchtype=author&query=Bu%2C+J">Jiahao Bu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhongzhi Chen</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xuemeng Huang</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+F">Fengzong Lian</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Saiyong Yang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+J">Jianfeng Yan</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Y">Yuyuan Zeng</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+X">Xiaoqin Ren</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+C">Chao Yu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+L">Lulu Wu</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+Y">Yue Mao</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+J">Jun Xia</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+T">Tao Yang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+S">Suncong Zheng</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+K">Kan Wu</a> , et al. (83 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02265v3-abstract-short" style="display: inline;"> In this paper, we introduce Hunyuan-Large, which is currently the largest open-source Transformer-based mixture of experts model, with a total of 389 billion parameters and 52 billion activation parameters, capable of handling up to 256K tokens. We conduct a thorough evaluation of Hunyuan-Large's superior performance across various benchmarks including language understanding and generation, logica… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02265v3-abstract-full').style.display = 'inline'; document.getElementById('2411.02265v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02265v3-abstract-full" style="display: none;"> In this paper, we introduce Hunyuan-Large, which is currently the largest open-source Transformer-based mixture of experts model, with a total of 389 billion parameters and 52 billion activation parameters, capable of handling up to 256K tokens. We conduct a thorough evaluation of Hunyuan-Large's superior performance across various benchmarks including language understanding and generation, logical reasoning, mathematical problem-solving, coding, long-context, and aggregated tasks, where it outperforms LLama3.1-70B and exhibits comparable performance when compared to the significantly larger LLama3.1-405B model. Key practice of Hunyuan-Large include large-scale synthetic data that is orders larger than in previous literature, a mixed expert routing strategy, a key-value cache compression technique, and an expert-specific learning rate strategy. Additionally, we also investigate the scaling laws and learning rate schedule of mixture of experts models, providing valuable insights and guidances for future model development and optimization. The code and checkpoints of Hunyuan-Large are released to facilitate future innovations and applications. Codes: https://github.com/Tencent/Hunyuan-Large Models: https://huggingface.co/tencent/Tencent-Hunyuan-Large <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02265v3-abstract-full').style.display = 'none'; document.getElementById('2411.02265v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 4 Figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02003">arXiv:2411.02003</a> <span> [<a href="https://arxiv.org/pdf/2411.02003">pdf</a>, <a href="https://arxiv.org/format/2411.02003">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Against Multifaceted Graph Heterogeneity via Asymmetric Federated Prompt Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Zhuoning Guo</a>, <a href="/search/cs?searchtype=author&query=Han%2C+R">Ruiqian Han</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hao Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02003v1-abstract-short" style="display: inline;"> Federated Graph Learning (FGL) aims to collaboratively and privately optimize graph models on divergent data for different tasks. A critical challenge in FGL is to enable effective yet efficient federated optimization against multifaceted graph heterogeneity to enhance mutual performance. However, existing FGL works primarily address graph data heterogeneity and perform incapable of graph task het… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02003v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02003v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02003v1-abstract-full" style="display: none;"> Federated Graph Learning (FGL) aims to collaboratively and privately optimize graph models on divergent data for different tasks. A critical challenge in FGL is to enable effective yet efficient federated optimization against multifaceted graph heterogeneity to enhance mutual performance. However, existing FGL works primarily address graph data heterogeneity and perform incapable of graph task heterogeneity. To address the challenge, we propose a Federated Graph Prompt Learning (FedGPL) framework to efficiently enable prompt-based asymmetric graph knowledge transfer between multifaceted heterogeneous federated participants. Generally, we establish a split federated framework to preserve universal and domain-specific graph knowledge, respectively. Moreover, we develop two algorithms to eliminate task and data heterogeneity for advanced federated knowledge preservation. First, a Hierarchical Directed Transfer Aggregator (HiDTA) delivers cross-task beneficial knowledge that is hierarchically distilled according to the directional transferability. Second, a Virtual Prompt Graph (VPG) adaptively generates graph structures to enhance data utility by distinguishing dominant subgraphs and neutralizing redundant ones. We conduct theoretical analyses and extensive experiments to demonstrate the significant accuracy and efficiency effectiveness of FedGPL against multifaceted graph heterogeneity compared to state-of-the-art baselines on large-scale federated graph datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02003v1-abstract-full').style.display = 'none'; document.getElementById('2411.02003v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01906">arXiv:2411.01906</a> <span> [<a href="https://arxiv.org/pdf/2411.01906">pdf</a>, <a href="https://arxiv.org/format/2411.01906">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Connection Performance Modeling and Analysis of a Radiosonde Network in a Typhoon </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hanyi Liu</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+X">Xianbin Cao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+P">Peng Yang</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/cs?searchtype=author&query=Quek%2C+T+Q+S">Tony Q. S. Quek</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+D+O">Dapeng Oliver Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01906v3-abstract-short" style="display: inline;"> This paper is concerned with the theoretical modeling and analysis of uplink connection performance of a radiosonde network deployed in a typhoon. Similar to existing works, the stochastic geometry theory is leveraged to derive the expression of the uplink connection probability (CP) of a radiosonde. Nevertheless, existing works assume that network nodes are spherically or uniformly distributed. D… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01906v3-abstract-full').style.display = 'inline'; document.getElementById('2411.01906v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01906v3-abstract-full" style="display: none;"> This paper is concerned with the theoretical modeling and analysis of uplink connection performance of a radiosonde network deployed in a typhoon. Similar to existing works, the stochastic geometry theory is leveraged to derive the expression of the uplink connection probability (CP) of a radiosonde. Nevertheless, existing works assume that network nodes are spherically or uniformly distributed. Different from the existing works, this paper investigates two particular motion patterns of radiosondes in a typhoon, which significantly challenges the theoretical analysis. According to their particular motion patterns, this paper first separately models the distributions of horizontal and vertical distances from a radiosonde to its receiver. Secondly, this paper derives the closed-form expressions of cumulative distribution function (CDF) and probability density function (PDF) of a radiosonde's three-dimensional (3D) propagation distance to its receiver. Thirdly, this paper derives the analytical expression of the uplink CP for any radiosonde in the network. Finally, extensive numerical simulations are conducted to validate the theoretical analysis, and the influence of various network design parameters are comprehensively discussed. Simulation results show that when the signal-to-interference-noise ratio (SINR) threshold is below -35 dB, and the density of radiosondes remains under 0.01/km^3, the uplink CP approaches 26%, 39%, and 50% in three patterns. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01906v3-abstract-full').style.display = 'none'; document.getElementById('2411.01906v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01748">arXiv:2411.01748</a> <span> [<a href="https://arxiv.org/pdf/2411.01748">pdf</a>, <a href="https://arxiv.org/format/2411.01748">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Rotation Perturbation Robustness in Point Cloud Analysis: A Perspective of Manifold Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xinyu Xu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Huazhen Liu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+F">Feiming Wei</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+H">Huilin Xiong</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+W">Wenxian Yu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01748v1-abstract-short" style="display: inline;"> Point cloud is often regarded as a discrete sampling of Riemannian manifold and plays a pivotal role in the 3D image interpretation. Particularly, rotation perturbation, an unexpected small change in rotation caused by various factors (like equipment offset, system instability, measurement errors and so on), can easily lead to the inferior results in point cloud learning tasks. However, classical… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01748v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01748v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01748v1-abstract-full" style="display: none;"> Point cloud is often regarded as a discrete sampling of Riemannian manifold and plays a pivotal role in the 3D image interpretation. Particularly, rotation perturbation, an unexpected small change in rotation caused by various factors (like equipment offset, system instability, measurement errors and so on), can easily lead to the inferior results in point cloud learning tasks. However, classical point cloud learning methods are sensitive to rotation perturbation, and the existing networks with rotation robustness also have much room for improvements in terms of performance and noise tolerance. Given these, this paper remodels the point cloud from the perspective of manifold as well as designs a manifold distillation method to achieve the robustness of rotation perturbation without any coordinate transformation. In brief, during the training phase, we introduce a teacher network to learn the rotation robustness information and transfer this information to the student network through online distillation. In the inference phase, the student network directly utilizes the original 3D coordinate information to achieve the robustness of rotation perturbation. Experiments carried out on four different datasets verify the effectiveness of our method. Averagely, on the Modelnet40 and ScanobjectNN classification datasets with random rotation perturbations, our classification accuracy has respectively improved by 4.92% and 4.41%, compared to popular rotation-robust networks; on the ShapeNet and S3DIS segmentation datasets, compared to the rotation-robust networks, the improvements of mIoU are 7.36% and 4.82%, respectively. Besides, from the experimental results, the proposed algorithm also shows excellent performance in resisting noise and outliers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01748v1-abstract-full').style.display = 'none'; document.getElementById('2411.01748v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 8 figures, submitted to TCSVT</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01624">arXiv:2411.01624</a> <span> [<a href="https://arxiv.org/pdf/2411.01624">pdf</a>, <a href="https://arxiv.org/format/2411.01624">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PreCM: The Padding-based Rotation Equivariant Convolution Mode for Semantic Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xinyu Xu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Huazhen Liu</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+H">Huilin Xiong</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+W">Wenxian Yu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01624v1-abstract-short" style="display: inline;"> Semantic segmentation is an important branch of image processing and computer vision. With the popularity of deep learning, various deep semantic segmentation networks have been proposed for pixel-level classification and segmentation tasks. However, the imaging angles are often arbitrary in real world, such as water body images in remote sensing, and capillary and polyp images in medical field, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01624v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01624v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01624v1-abstract-full" style="display: none;"> Semantic segmentation is an important branch of image processing and computer vision. With the popularity of deep learning, various deep semantic segmentation networks have been proposed for pixel-level classification and segmentation tasks. However, the imaging angles are often arbitrary in real world, such as water body images in remote sensing, and capillary and polyp images in medical field, and we usually cannot obtain prior orientation information to guide these networks to extract more effective features. Additionally, learning the features of objects with multiple orientation information is also challenging, as most CNN-based semantic segmentation networks do not have rotation equivariance to resist the disturbance from orientation information. To address the same, in this paper, we first establish a universal convolution-group framework to more fully utilize the orientation information and make the networks rotation equivariant. Then, we mathematically construct the padding-based rotation equivariant convolution mode (PreCM), which can be used not only for multi-scale images and convolution kernels, but also as a replacement component to replace multiple convolutions, like dilated convolution, transposed convolution, variable stride convolution, etc. In order to verify the realization of rotation equivariance, a new evaluation metric named rotation difference (RD) is finally proposed. The experiments carried out on the datesets Satellite Images of Water Bodies, DRIVE and Floodnet show that the PreCM-based networks can achieve better segmentation performance than the original and data augmentation-based networks. In terms of the average RD value, the former is 0% and the latter two are respectively 7.0503% and 3.2606%. Last but not least, PreCM also effectively enhances the robustness of networks to rotation perturbations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01624v1-abstract-full').style.display = 'none'; document.getElementById('2411.01624v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 14 figures, submitted to TIP</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01575">arXiv:2411.01575</a> <span> [<a href="https://arxiv.org/pdf/2411.01575">pdf</a>, <a href="https://arxiv.org/format/2411.01575">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> HC$^3$L-Diff: Hybrid conditional latent diffusion with high frequency enhancement for CBCT-to-CT synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yin%2C+S">Shi Yin</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+H">Hongqi Tan</a>, <a href="/search/cs?searchtype=author&query=Chong%2C+L+M">Li Ming Chong</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haofeng Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hui Liu</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+K+H">Kang Hao Lee</a>, <a href="/search/cs?searchtype=author&query=Tuan%2C+J+K+L">Jeffrey Kit Loong Tuan</a>, <a href="/search/cs?searchtype=author&query=Ho%2C+D">Dean Ho</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Y">Yueming Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01575v1-abstract-short" style="display: inline;"> Background: Cone-beam computed tomography (CBCT) plays a crucial role in image-guided radiotherapy, but artifacts and noise make them unsuitable for accurate dose calculation. Artificial intelligence methods have shown promise in enhancing CBCT quality to produce synthetic CT (sCT) images. However, existing methods either produce images of suboptimal quality or incur excessive time costs, failing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01575v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01575v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01575v1-abstract-full" style="display: none;"> Background: Cone-beam computed tomography (CBCT) plays a crucial role in image-guided radiotherapy, but artifacts and noise make them unsuitable for accurate dose calculation. Artificial intelligence methods have shown promise in enhancing CBCT quality to produce synthetic CT (sCT) images. However, existing methods either produce images of suboptimal quality or incur excessive time costs, failing to satisfy clinical practice standards. Methods and materials: We propose a novel hybrid conditional latent diffusion model for efficient and accurate CBCT-to-CT synthesis, named HC$^3$L-Diff. We employ the Unified Feature Encoder (UFE) to compress images into a low-dimensional latent space, thereby optimizing computational efficiency. Beyond the use of CBCT images, we propose integrating its high-frequency knowledge as a hybrid condition to guide the diffusion model in generating sCT images with preserved structural details. This high-frequency information is captured using our designed High-Frequency Extractor (HFE). During inference, we utilize denoising diffusion implicit model to facilitate rapid sampling. We construct a new in-house prostate dataset with paired CBCT and CT to validate the effectiveness of our method. Result: Extensive experimental results demonstrate that our approach outperforms state-of-the-art methods in terms of sCT quality and generation efficiency. Moreover, our medical physicist conducts the dosimetric evaluations to validate the benefit of our method in practical dose calculation, achieving a remarkable 93.8% gamma passing rate with a 2%/2mm criterion, superior to other methods. Conclusion: The proposed HC$^3$L-Diff can efficiently achieve high-quality CBCT-to-CT synthesis in only over 2 mins per patient. Its promising performance in dose calculation shows great potential for enhancing real-world adaptive radiotherapy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01575v1-abstract-full').style.display = 'none'; document.getElementById('2411.01575v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01033">arXiv:2411.01033</a> <span> [<a href="https://arxiv.org/pdf/2411.01033">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Many-Objective Search-Based Coverage-Guided Automatic Test Generation for Deep Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+D">Dongcheng Li</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+W+E">W. Eric Wong</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hu Liu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+M">Man Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01033v1-abstract-short" style="display: inline;"> To ensure the reliability of DNN systems and address the test generation problem for neural networks, this paper proposes a fuzzing test generation technique based on many-objective optimization algorithms. Traditional fuzz testing employs random search, leading to lower testing efficiency and tends to generate numerous invalid test cases. By utilizing many-objective optimization techniques, effec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01033v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01033v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01033v1-abstract-full" style="display: none;"> To ensure the reliability of DNN systems and address the test generation problem for neural networks, this paper proposes a fuzzing test generation technique based on many-objective optimization algorithms. Traditional fuzz testing employs random search, leading to lower testing efficiency and tends to generate numerous invalid test cases. By utilizing many-objective optimization techniques, effective test cases can be generated. To achieve high test coverage, this paper proposes several improvement strategies. The frequency-based fuzz sampling strategy assigns priorities based on the frequency of selection of initial data, avoiding the repetitive selection of the same data and enhancing the quality of initial data better than random sampling strategies. To address the issue that global search may yield test not satisfying semantic constraints, a local search strategy based on the Monte Carlo tree search is proposed to enhance the algorithm's local search capabilities. Furthermore, we improve the diversity of the population and the algorithm's global search capability by updating SPEA2's external archive based on a decomposition-based archiving strategy. To validate the effectiveness of the proposed approach, experiments were conducted on several public datasets and various neural network models. The results reveal that, compared to random and clustering-based sampling, the frequency-based fuzz sampling strategy provides a greater improvement in coverage rate in the later stages of iterations. On complex networks like VGG16, the improved SPEA2 algorithm increased the coverage rate by about 12% across several coverage metrics, and by approximately 40% on LeNet series networks. The experimental results also indicates that the newly generated test cases not only exhibit higher coverage rates but also generate adversarial samples that reveal model errors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01033v1-abstract-full').style.display = 'none'; document.getElementById('2411.01033v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00850">arXiv:2411.00850</a> <span> [<a href="https://arxiv.org/pdf/2411.00850">pdf</a>, <a href="https://arxiv.org/format/2411.00850">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> GWQ: Gradient-Aware Weight Quantization for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shao%2C+Y">Yihua Shao</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+S">Siyu Liang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+X">Xiaolin Lin</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+Z">Zijian Ling</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Zixian Zhu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+M">Minxi Yan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haiyang Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Siyu Chen</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Z">Ziyang Yan</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+Y">Yilan Meng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chenyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+H">Haotong Qin</a>, <a href="/search/cs?searchtype=author&query=Magno%2C+M">Michele Magno</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yang Yang</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Z">Zhen Lei</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yan Wang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jingcai Guo</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+L">Ling Shao</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+H">Hao Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00850v1-abstract-short" style="display: inline;"> Large language models (LLMs) show impressive performance in solving complex languagetasks. However, its large number of parameterspresent significant challenges for the deployment and application of the model on edge devices. Compressing large language models to low bits can enable them to run on resource-constrained devices, often leading to performance degradation. To address this problem, we pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00850v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00850v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00850v1-abstract-full" style="display: none;"> Large language models (LLMs) show impressive performance in solving complex languagetasks. However, its large number of parameterspresent significant challenges for the deployment and application of the model on edge devices. Compressing large language models to low bits can enable them to run on resource-constrained devices, often leading to performance degradation. To address this problem, we propose gradient-aware weight quantization (GWQ), the first quantization approach for low-bit weight quantization that leverages gradients to localize outliers, requiring only a minimal amount of calibration data for outlier detection. GWQ retains the weights corresponding to the top 1% outliers preferentially at FP16 precision, while the remaining non-outlier weights are stored in a low-bit format. GWQ found experimentally that utilizing the sensitive weights in the gradient localization model is more scientific compared to utilizing the sensitive weights in the Hessian matrix localization model. Compared to current quantization methods, GWQ can be applied to multiple language models and achieves lower PPL on the WikiText2 and C4 dataset. In the zero-shot task, GWQ quantized models have higher accuracy compared to other quantization methods.GWQ is also suitable for multimodal model quantization, and the quantized Qwen-VL family model is more accurate than other methods. zero-shot target detection task dataset RefCOCO outperforms the current stat-of-the-arts method SPQR. GWQ achieves 1.2x inference speedup in comparison to the original model, and effectively reduces the inference memory. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00850v1-abstract-full').style.display = 'none'; document.getElementById('2411.00850v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00401">arXiv:2411.00401</a> <span> [<a href="https://arxiv.org/pdf/2411.00401">pdf</a>, <a href="https://arxiv.org/format/2411.00401">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Statistical Guarantees for Lifelong Reinforcement Learning using PAC-Bayesian Theory </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhi Zhang</a>, <a href="/search/cs?searchtype=author&query=Chow%2C+C">Chris Chow</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yasi Zhang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yanchao Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haochen Zhang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+E+H">Eric Hanchen Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Han Liu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+F">Furong Huang</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+Y">Yuchen Cui</a>, <a href="/search/cs?searchtype=author&query=Padilla%2C+O+H+M">Oscar Hernan Madrid Padilla</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00401v1-abstract-short" style="display: inline;"> Lifelong reinforcement learning (RL) has been developed as a paradigm for extending single-task RL to more realistic, dynamic settings. In lifelong RL, the "life" of an RL agent is modeled as a stream of tasks drawn from a task distribution. We propose EPIC (\underline{E}mpirical \underline{P}AC-Bayes that \underline{I}mproves \underline{C}ontinuously), a novel algorithm designed for lifelong RL u… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00401v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00401v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00401v1-abstract-full" style="display: none;"> Lifelong reinforcement learning (RL) has been developed as a paradigm for extending single-task RL to more realistic, dynamic settings. In lifelong RL, the "life" of an RL agent is modeled as a stream of tasks drawn from a task distribution. We propose EPIC (\underline{E}mpirical \underline{P}AC-Bayes that \underline{I}mproves \underline{C}ontinuously), a novel algorithm designed for lifelong RL using PAC-Bayes theory. EPIC learns a shared policy distribution, referred to as the \textit{world policy}, which enables rapid adaptation to new tasks while retaining valuable knowledge from previous experiences. Our theoretical analysis establishes a relationship between the algorithm's generalization performance and the number of prior tasks preserved in memory. We also derive the sample complexity of EPIC in terms of RL regret. Extensive experiments on a variety of environments demonstrate that EPIC significantly outperforms existing methods in lifelong RL, offering both theoretical guarantees and practical efficacy through the use of the world policy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00401v1-abstract-full').style.display = 'none'; document.getElementById('2411.00401v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00040">arXiv:2411.00040</a> <span> [<a href="https://arxiv.org/pdf/2411.00040">pdf</a>, <a href="https://arxiv.org/format/2411.00040">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> P$^2$C$^2$Net: PDE-Preserved Coarse Correction Network for efficient prediction of spatiotemporal dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qi Wang</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+P">Pu Ren</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xin-Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+Z">Zhiwen Deng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yi Zhang</a>, <a href="/search/cs?searchtype=author&query=Chengze%2C+R">Ruizhi Chengze</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongsheng Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zidong Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jian-Xun Wang</a>, <a href="/search/cs?searchtype=author&query=Ji-Rong_Wen"> Ji-Rong_Wen</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Hao Sun</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00040v1-abstract-short" style="display: inline;"> When solving partial differential equations (PDEs), classical numerical methods often require fine mesh grids and small time stepping to meet stability, consistency, and convergence conditions, leading to high computational cost. Recently, machine learning has been increasingly utilized to solve PDE problems, but they often encounter challenges related to interpretability, generalizability, and st… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00040v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00040v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00040v1-abstract-full" style="display: none;"> When solving partial differential equations (PDEs), classical numerical methods often require fine mesh grids and small time stepping to meet stability, consistency, and convergence conditions, leading to high computational cost. Recently, machine learning has been increasingly utilized to solve PDE problems, but they often encounter challenges related to interpretability, generalizability, and strong dependency on rich labeled data. Hence, we introduce a new PDE-Preserved Coarse Correction Network (P$^2$C$^2$Net) to efficiently solve spatiotemporal PDE problems on coarse mesh grids in small data regimes. The model consists of two synergistic modules: (1) a trainable PDE block that learns to update the coarse solution (i.e., the system state), based on a high-order numerical scheme with boundary condition encoding, and (2) a neural network block that consistently corrects the solution on the fly. In particular, we propose a learnable symmetric Conv filter, with weights shared over the entire model, to accurately estimate the spatial derivatives of PDE based on the neural-corrected system state. The resulting physics-encoded model is capable of handling limited training data (e.g., 3--5 trajectories) and accelerates the prediction of PDE solutions on coarse spatiotemporal grids while maintaining a high accuracy. P$^2$C$^2$Net achieves consistent state-of-the-art performance with over 50\% gain (e.g., in terms of relative prediction error) across four datasets covering complex reaction-diffusion processes and turbulent flows. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00040v1-abstract-full').style.display = 'none'; document.getElementById('2411.00040v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23866">arXiv:2410.23866</a> <span> [<a href="https://arxiv.org/pdf/2410.23866">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Evaluating and Improving ChatGPT-Based Expansion of Abbreviations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yanjie Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hui Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23866v1-abstract-short" style="display: inline;"> Source code identifiers often contain abbreviations. Such abbreviations may reduce the readability of the source code, which in turn hinders the maintenance of the software applications. To this end, accurate and automated approaches to expanding abbreviations in source code are desirable and abbreviation expansion has been intensively investigated. However, to the best of our knowledge, most exis… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23866v1-abstract-full').style.display = 'inline'; document.getElementById('2410.23866v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23866v1-abstract-full" style="display: none;"> Source code identifiers often contain abbreviations. Such abbreviations may reduce the readability of the source code, which in turn hinders the maintenance of the software applications. To this end, accurate and automated approaches to expanding abbreviations in source code are desirable and abbreviation expansion has been intensively investigated. However, to the best of our knowledge, most existing approaches are heuristics, and none of them has even employed deep learning techniques, let alone the most advanced large language models (LLMs). LLMs have demonstrated cutting-edge performance in various software engineering tasks, and thus it has the potential to expand abbreviation automatically. To this end, in this paper, we present the first empirical study on LLM-based abbreviation expansion. Our evaluation results on a public benchmark suggest that ChatGPT is substantially less accurate than the state-of-the-art approach, reducing precision and recall by 28.2\% and 27.8\%, respectively. We manually analyzed the failed cases, and discovered the root causes for the failures: 1) Lack of contexts and 2) Inability to recognize abbreviations. In response to the first cause, we investigated the effect of various contexts and found surrounding source code is the best selection. In response to the second cause, we designed an iterative approach that identifies and explicitly marks missed abbreviations in prompts. Finally, we proposed a post-condition checking to exclude incorrect expansions that violate commonsense. All such measures together make ChatGPT-based abbreviation expansion comparable to the state of the art while avoiding expensive source code parsing and deep analysis that are indispensable for state-of-the-art approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23866v1-abstract-full').style.display = 'none'; document.getElementById('2410.23866v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Liu%2C+H&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Liu%2C+H&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+H&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+H&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+H&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+H&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository