CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 525 results for author: <span class="mathjax">Xia, L</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Xia%2C+L">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Xia, L"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Xia%2C+L&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Xia, L"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Xia%2C+L&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Xia%2C+L&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Xia%2C+L&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Xia%2C+L&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Xia%2C+L&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Xia%2C+L&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.16308">arXiv:2411.16308</a> <span> [<a href="https://arxiv.org/pdf/2411.16308">pdf</a>, <a href="https://arxiv.org/format/2411.16308">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> An End-to-End Robust Point Cloud Semantic Segmentation Network with Single-Step Conditional Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qu%2C+W">Wentao Qu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jing Wang</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+Y">YongShun Gong</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xiaoshui Huang</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Liang Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.16308v2-abstract-short" style="display: inline;"> Existing conditional Denoising Diffusion Probabilistic Models (DDPMs) with a Noise-Conditional Framework (NCF) remain challenging for 3D scene understanding tasks, as the complex geometric details in scenes increase the difficulty of fitting the gradients of the data distribution (the scores) from semantic labels. This also results in longer training and inference time for DDPMs compared to non-DD… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16308v2-abstract-full').style.display = 'inline'; document.getElementById('2411.16308v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.16308v2-abstract-full" style="display: none;"> Existing conditional Denoising Diffusion Probabilistic Models (DDPMs) with a Noise-Conditional Framework (NCF) remain challenging for 3D scene understanding tasks, as the complex geometric details in scenes increase the difficulty of fitting the gradients of the data distribution (the scores) from semantic labels. This also results in longer training and inference time for DDPMs compared to non-DDPMs. From a different perspective, we delve deeply into the model paradigm dominated by the Conditional Network. In this paper, we propose an end-to-end robust semantic \textbf{Seg}mentation \textbf{Net}work based on a \textbf{C}onditional-Noise Framework (CNF) of D\textbf{D}PMs, named \textbf{CDSegNet}. Specifically, CDSegNet models the Noise Network (NN) as a learnable noise-feature generator. This enables the Conditional Network (CN) to understand 3D scene semantics under multi-level feature perturbations, enhancing the generalization in unseen scenes. Meanwhile, benefiting from the noise system of DDPMs, CDSegNet exhibits strong noise and sparsity robustness in experiments. Moreover, thanks to CNF, CDSegNet can generate the semantic labels in a single-step inference like non-DDPMs, due to avoiding directly fitting the scores from semantic labels in the dominant network of CDSegNet. On public indoor and outdoor benchmarks, CDSegNet significantly outperforms existing methods, achieving state-of-the-art performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16308v2-abstract-full').style.display = 'none'; document.getElementById('2411.16308v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13789">arXiv:2411.13789</a> <span> [<a href="https://arxiv.org/pdf/2411.13789">pdf</a>, <a href="https://arxiv.org/format/2411.13789">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> LEADRE: Multi-Faceted Knowledge Enhanced LLM Empowered Display Advertisement Recommender System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+F">Fengxin Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yi Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yue Liu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+C">Chao Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuan Wang</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+X">Xiaoxiang Deng</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+W">Wei Xue</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Dapeng Liu</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lei Xiao</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+H">Haijie Gu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Jie Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongyan Liu</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+B">Biao Qin</a>, <a href="/search/cs?searchtype=author&query=He%2C+J">Jun He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13789v2-abstract-short" style="display: inline;"> Display advertising provides significant value to advertisers, publishers, and users. Traditional display advertising systems utilize a multi-stage architecture consisting of retrieval, coarse ranking, and final ranking. However, conventional retrieval methods rely on ID-based learning to rank mechanisms and fail to adequately utilize the content information of ads, which hampers their ability to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13789v2-abstract-full').style.display = 'inline'; document.getElementById('2411.13789v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13789v2-abstract-full" style="display: none;"> Display advertising provides significant value to advertisers, publishers, and users. Traditional display advertising systems utilize a multi-stage architecture consisting of retrieval, coarse ranking, and final ranking. However, conventional retrieval methods rely on ID-based learning to rank mechanisms and fail to adequately utilize the content information of ads, which hampers their ability to provide diverse recommendation lists. To address this limitation, we propose leveraging the extensive world knowledge of LLMs. However, three key challenges arise when attempting to maximize the effectiveness of LLMs: "How to capture user interests", "How to bridge the knowledge gap between LLMs and advertising system", and "How to efficiently deploy LLMs". To overcome these challenges, we introduce a novel LLM-based framework called LLM Empowered Display ADvertisement REcommender system (LEADRE). LEADRE consists of three core modules: (1) The Intent-Aware Prompt Engineering introduces multi-faceted knowledge and designs intent-aware <Prompt, Response> pairs that fine-tune LLMs to generate ads tailored to users' personal interests. (2) The Advertising-Specific Knowledge Alignment incorporates auxiliary fine-tuning tasks and Direct Preference Optimization (DPO) to align LLMs with ad semantic and business value. (3) The Efficient System Deployment deploys LEADRE in an online environment by integrating both latency-tolerant and latency-sensitive service. Extensive offline experiments demonstrate the effectiveness of LEADRE and validate the contributions of individual modules. Online A/B test shows that LEADRE leads to a 1.57% and 1.17% GMV lift for serviced users on WeChat Channels and Moments separately. LEADRE has been deployed on both platforms, serving tens of billions of requests each day. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13789v2-abstract-full').style.display = 'none'; document.getElementById('2411.13789v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12441">arXiv:2411.12441</a> <span> [<a href="https://arxiv.org/pdf/2411.12441">pdf</a>, <a href="https://arxiv.org/format/2411.12441">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Towards Unifying Feature Interaction Models for Click-Through Rate Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yu Kang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+J">Junwei Pan</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+J">Jipeng Jin</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+S">Shudong Huang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+X">Xiaofeng Gao</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lei Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12441v1-abstract-short" style="display: inline;"> Modeling feature interactions plays a crucial role in accurately predicting click-through rates (CTR) in advertising systems. To capture the intricate patterns of interaction, many existing models employ matrix-factorization techniques to represent features as lower-dimensional embedding vectors, enabling the modeling of interactions as products between these embeddings. In this paper, we propose… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12441v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12441v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12441v1-abstract-full" style="display: none;"> Modeling feature interactions plays a crucial role in accurately predicting click-through rates (CTR) in advertising systems. To capture the intricate patterns of interaction, many existing models employ matrix-factorization techniques to represent features as lower-dimensional embedding vectors, enabling the modeling of interactions as products between these embeddings. In this paper, we propose a general framework called IPA to systematically unify these models. Our framework comprises three key components: the Interaction Function, which facilitates feature interaction; the Layer Pooling, which constructs higher-level interaction layers; and the Layer Aggregator, which combines the outputs of all layers to serve as input for the subsequent classifier. We demonstrate that most existing models can be categorized within our framework by making specific choices for these three components. Through extensive experiments and a dimensional collapse analysis, we evaluate the performance of these choices. Furthermore, by leveraging the most powerful components within our framework, we introduce a novel model that achieves competitive results compared to state-of-the-art CTR models. PFL gets significant GMV lift during online A/B test in Tencent's advertising platform and has been deployed as the production model in several primary scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12441v1-abstract-full').style.display = 'none'; document.getElementById('2411.12441v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12135">arXiv:2411.12135</a> <span> [<a href="https://arxiv.org/pdf/2411.12135">pdf</a>, <a href="https://arxiv.org/format/2411.12135">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Exact Risk Curves of signSGD in High-Dimensions: Quantifying Preconditioning and Noise-Compression Effects </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiao%2C+K+L">Ke Liang Xiao</a>, <a href="/search/cs?searchtype=author&query=Marshall%2C+N">Noah Marshall</a>, <a href="/search/cs?searchtype=author&query=Agarwala%2C+A">Atish Agarwala</a>, <a href="/search/cs?searchtype=author&query=Paquette%2C+E">Elliot Paquette</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12135v1-abstract-short" style="display: inline;"> In recent years, signSGD has garnered interest as both a practical optimizer as well as a simple model to understand adaptive optimizers like Adam. Though there is a general consensus that signSGD acts to precondition optimization and reshapes noise, quantitatively understanding these effects in theoretically solvable settings remains difficult. We present an analysis of signSGD in a high dimensio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12135v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12135v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12135v1-abstract-full" style="display: none;"> In recent years, signSGD has garnered interest as both a practical optimizer as well as a simple model to understand adaptive optimizers like Adam. Though there is a general consensus that signSGD acts to precondition optimization and reshapes noise, quantitatively understanding these effects in theoretically solvable settings remains difficult. We present an analysis of signSGD in a high dimensional limit, and derive a limiting SDE and ODE to describe the risk. Using this framework we quantify four effects of signSGD: effective learning rate, noise compression, diagonal preconditioning, and gradient noise reshaping. Our analysis is consistent with experimental observations but moves beyond that by quantifying the dependence of these effects on the data and noise distributions. We conclude with a conjecture on how these results might be extended to Adam. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12135v1-abstract-full').style.display = 'none'; document.getElementById('2411.12135v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09691">arXiv:2411.09691</a> <span> [<a href="https://arxiv.org/pdf/2411.09691">pdf</a>, <a href="https://arxiv.org/format/2411.09691">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Advancing Fine-Grained Visual Understanding with Multi-Scale Alignment in Multi-Modal Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wei Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhaowei Li</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Q">Qi Xu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Linfeng Li</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+Y">YiQing Cai</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+B">Botian Jiang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+H">Hang Song</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xingcan Hu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Pengyu Wang</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Li Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09691v1-abstract-short" style="display: inline;"> Multi-modal large language models (MLLMs) have achieved remarkable success in fine-grained visual understanding across a range of tasks. However, they often encounter significant challenges due to inadequate alignment for fine-grained knowledge, which restricts their ability to accurately capture local details and attain a comprehensive global perception. While recent advancements have focused on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09691v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09691v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09691v1-abstract-full" style="display: none;"> Multi-modal large language models (MLLMs) have achieved remarkable success in fine-grained visual understanding across a range of tasks. However, they often encounter significant challenges due to inadequate alignment for fine-grained knowledge, which restricts their ability to accurately capture local details and attain a comprehensive global perception. While recent advancements have focused on aligning object expressions with grounding information, they typically lack explicit integration of object images, which contain affluent information beyond mere texts or coordinates. To bridge this gap, we introduce a novel fine-grained visual knowledge alignment method that effectively aligns and integrates multi-scale knowledge of objects, including texts, coordinates, and images. This innovative method is underpinned by our multi-scale fine-grained enhancement data synthesis pipeline, which provides over 300K essential training data to enhance alignment and improve overall performance. Furthermore, we present TinyGroundingGPT, a series of compact models optimized for high-level alignments. With a scale of approximately 3B parameters, TinyGroundingGPT achieves outstanding results in grounding tasks while delivering performance comparable to larger MLLMs in complex visual scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09691v1-abstract-full').style.display = 'none'; document.getElementById('2411.09691v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07360">arXiv:2411.07360</a> <span> [<a href="https://arxiv.org/pdf/2411.07360">pdf</a>, <a href="https://arxiv.org/format/2411.07360">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> ChatGPT Inaccuracy Mitigation during Technical Report Understanding: Are We There Yet? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tamanna%2C+S+B">Salma Begum Tamanna</a>, <a href="/search/cs?searchtype=author&query=Uddin%2C+G">Gias Uddin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Song Wang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+L">Lan Xia</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Longyu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07360v1-abstract-short" style="display: inline;"> Hallucinations, the tendency to produce irrelevant/incorrect responses, are prevalent concerns in generative AI-based tools like ChatGPT. Although hallucinations in ChatGPT are studied for textual responses, it is unknown how ChatGPT hallucinates for technical texts that contain both textual and technical terms. We surveyed 47 software engineers and produced a benchmark of 412 Q&A pairs from the b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07360v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07360v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07360v1-abstract-full" style="display: none;"> Hallucinations, the tendency to produce irrelevant/incorrect responses, are prevalent concerns in generative AI-based tools like ChatGPT. Although hallucinations in ChatGPT are studied for textual responses, it is unknown how ChatGPT hallucinates for technical texts that contain both textual and technical terms. We surveyed 47 software engineers and produced a benchmark of 412 Q&A pairs from the bug reports of two OSS projects. We find that a RAG-based ChatGPT (i.e., ChatGPT tuned with the benchmark issue reports) is 36.4% correct when producing answers to the questions, due to two reasons 1) limitations to understand complex technical contents in code snippets like stack traces, and 2) limitations to integrate contexts denoted in the technical terms and texts. We present CHIME (ChatGPT Inaccuracy Mitigation Engine) whose underlying principle is that if we can preprocess the technical reports better and guide the query validation process in ChatGPT, we can address the observed limitations. CHIME uses context-free grammar (CFG) to parse stack traces in technical reports. CHIME then verifies and fixes ChatGPT responses by applying metamorphic testing and query transformation. In our benchmark, CHIME shows 30.3% more correction over ChatGPT responses. In a user study, we find that the improved responses with CHIME are considered more useful than those generated from ChatGPT without CHIME. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07360v1-abstract-full').style.display = 'none'; document.getElementById('2411.07360v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 47th IEEE/ACM International Conference on Software Engineering (ICSE 2025) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01561">arXiv:2411.01561</a> <span> [<a href="https://arxiv.org/pdf/2411.01561">pdf</a>, <a href="https://arxiv.org/format/2411.01561">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Multimodal Graph Neural Network for Recommendation with Dynamic De-redundancy and Modality-Guided Feature De-noisy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mo%2C+F">Feng Mo</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lin Xiao</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Q">Qiya Song</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+X">Xieping Gao</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+E">Eryao Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01561v1-abstract-short" style="display: inline;"> Graph neural networks (GNNs) have become crucial in multimodal recommendation tasks because of their powerful ability to capture complex relationships between neighboring nodes. However, increasing the number of propagation layers in GNNs can lead to feature redundancy, which may negatively impact the overall recommendation performance. In addition, the existing recommendation task method directly… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01561v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01561v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01561v1-abstract-full" style="display: none;"> Graph neural networks (GNNs) have become crucial in multimodal recommendation tasks because of their powerful ability to capture complex relationships between neighboring nodes. However, increasing the number of propagation layers in GNNs can lead to feature redundancy, which may negatively impact the overall recommendation performance. In addition, the existing recommendation task method directly maps the preprocessed multimodal features to the low-dimensional space, which will bring the noise unrelated to user preference, thus affecting the representation ability of the model. To tackle the aforementioned challenges, we propose Multimodal Graph Neural Network for Recommendation (MGNM) with Dynamic De-redundancy and Modality-Guided Feature De-noisy, which is divided into local and global interaction. Initially, in the local interaction process,we integrate a dynamic de-redundancy (DDR) loss function which is achieved by utilizing the product of the feature coefficient matrix and the feature matrix as a penalization factor. It reduces the feature redundancy effects of multimodal and behavioral features caused by the stacking of multiple GNN layers. Subsequently, in the global interaction process, we developed modality-guided global feature purifiers for each modality to alleviate the impact of modality noise. It is a two-fold guiding mechanism eliminating modality features that are irrelevant to user preferences and captures complex relationships within the modality. Experimental results demonstrate that MGNM achieves superior performance on multimodal information denoising and removal of redundant information compared to the state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01561v1-abstract-full').style.display = 'none'; document.getElementById('2411.01561v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22041">arXiv:2410.22041</a> <span> </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> An LLM-based Simulation Framework for Embodied Conversational Agents in Psychological Counseling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+L">Lixiu Wu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yuanrong Tang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+Q">Qisen Pan</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+X">Xianyang Zhan</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Y">Yucheng Han</a>, <a href="/search/cs?searchtype=author&query=You%2C+M">Mingyang You</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lanxi Xiao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianhong Wang</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+C">Chen Zhong</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+J">Jiangtao Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22041v2-abstract-short" style="display: inline;"> Simulation is crucial for validating algorithmic strategies in real-world scenarios. While LLM-based social simulation shows promise as a mainstream tool, simulating complex scenarios like psychological counseling remains challenging. We present ECAs (short for Embodied Conversational Agents), a framework for simulating psychological counseling clients' embodied memory, integrating embodied cognit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22041v2-abstract-full').style.display = 'inline'; document.getElementById('2410.22041v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22041v2-abstract-full" style="display: none;"> Simulation is crucial for validating algorithmic strategies in real-world scenarios. While LLM-based social simulation shows promise as a mainstream tool, simulating complex scenarios like psychological counseling remains challenging. We present ECAs (short for Embodied Conversational Agents), a framework for simulating psychological counseling clients' embodied memory, integrating embodied cognition and counseling theories. We formulate six design goals based on a comprehensive review of psychological counseling theories. Using LLMs, we expand real counseling case data into a nuanced embodied cognitive memory space and generate dialogues based on high-frequency counseling questions. We validate our framework using the D4 dataset, with evaluations by licensed counselors. Results show our approach significantly outperforms baselines in simulation authenticity and necessity. To demonstrate scalability, we created a public ECAs dataset through batch simulations. This research provides valuable insights for future social simulation studies in psychological counseling and Embodied Counseling Agents research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22041v2-abstract-full').style.display = 'none'; document.getElementById('2410.22041v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">After careful consideration, we have decided to withdraw this version because there are still several details that need to be adjusted to ensure the accuracy and completeness of our work. We do not have an alternative version in the short term and will resubmit it after the revision is completed</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20838">arXiv:2410.20838</a> <span> [<a href="https://arxiv.org/pdf/2410.20838">pdf</a>, <a href="https://arxiv.org/format/2410.20838">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> A Simple Yet Effective Corpus Construction Framework for Indonesian Grammatical Error Correction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+N">Nankai Lin</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+M">Meiyu Zeng</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wentao Huang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+S">Shengyi Jiang</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lixian Xiao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+A">Aimin Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20838v1-abstract-short" style="display: inline;"> Currently, the majority of research in grammatical error correction (GEC) is concentrated on universal languages, such as English and Chinese. Many low-resource languages lack accessible evaluation corpora. How to efficiently construct high-quality evaluation corpora for GEC in low-resource languages has become a significant challenge. To fill these gaps, in this paper, we present a framework for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20838v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20838v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20838v1-abstract-full" style="display: none;"> Currently, the majority of research in grammatical error correction (GEC) is concentrated on universal languages, such as English and Chinese. Many low-resource languages lack accessible evaluation corpora. How to efficiently construct high-quality evaluation corpora for GEC in low-resource languages has become a significant challenge. To fill these gaps, in this paper, we present a framework for constructing GEC corpora. Specifically, we focus on Indonesian as our research language and construct an evaluation corpus for Indonesian GEC using the proposed framework, addressing the limitations of existing evaluation corpora in Indonesian. Furthermore, we investigate the feasibility of utilizing existing large language models (LLMs), such as GPT-3.5-Turbo and GPT-4, to streamline corpus annotation efforts in GEC tasks. The results demonstrate significant potential for enhancing the performance of LLMs in low-resource language settings. Our code and corpus can be obtained from https://github.com/GKLMIP/GEC-Construction-Framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20838v1-abstract-full').style.display = 'none'; document.getElementById('2410.20838v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18418">arXiv:2410.18418</a> <span> [<a href="https://arxiv.org/pdf/2410.18418">pdf</a>, <a href="https://arxiv.org/format/2410.18418">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Knowledge-Assisted Privacy Preserving in Semantic Communication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xuesong Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yao Sun</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+R">Runze Cheng</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+L">Le Xia</a>, <a href="/search/cs?searchtype=author&query=Abumarshoud%2C+H">Hanaa Abumarshoud</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/cs?searchtype=author&query=Imran%2C+M+A">Muhammad Ali Imran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18418v2-abstract-short" style="display: inline;"> Semantic communication (SC) offers promising advancements in data transmission efficiency and reliability by focusing on delivering true meaning rather than solely binary bits of messages. However, privacy concerns in SC might become outstanding. Eavesdroppers equipped with advanced semantic coding models and extensive knowledge could be capable of correctly decoding and reasoning sensitive semant… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18418v2-abstract-full').style.display = 'inline'; document.getElementById('2410.18418v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18418v2-abstract-full" style="display: none;"> Semantic communication (SC) offers promising advancements in data transmission efficiency and reliability by focusing on delivering true meaning rather than solely binary bits of messages. However, privacy concerns in SC might become outstanding. Eavesdroppers equipped with advanced semantic coding models and extensive knowledge could be capable of correctly decoding and reasoning sensitive semantics from just a few stolen bits. To this end, this article explores utilizing knowledge to enhance data privacy in SC networks. Specifically, we first identify the potential attacks in SC based on the analysis of knowledge. Then, we propose a knowledge-assisted privacy preserving SC framework, which consists of a data transmission layer for precisely encoding and decoding source messages, and a knowledge management layer responsible for injecting appropriate knowledge into the transmission pair. Moreover, we elaborate on the transceiver design in the proposed SC framework to explain how knowledge should be utilized properly. Finally, some challenges of the proposed SC framework are discussed to expedite the practical implementation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18418v2-abstract-full').style.display = 'none'; document.getElementById('2410.18418v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17372">arXiv:2410.17372</a> <span> [<a href="https://arxiv.org/pdf/2410.17372">pdf</a>, <a href="https://arxiv.org/format/2410.17372">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> A Systematic Mapping Study on Architectural Approaches to Software Performance Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yutong Zhao</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lu Xiao</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+C">Chenhao Wei</a>, <a href="/search/cs?searchtype=author&query=Kazman%2C+R">Rick Kazman</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Ye Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17372v1-abstract-short" style="display: inline;"> Software architecture is the foundation of a system's ability to achieve various quality attributes, including software performance. However, there lacks comprehensive and in-depth understanding of why and how software architecture and performance analysis are integrated to guide related future research. To fill this gap, this paper presents a systematic mapping study of 109 papers that integrate… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17372v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17372v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17372v1-abstract-full" style="display: none;"> Software architecture is the foundation of a system's ability to achieve various quality attributes, including software performance. However, there lacks comprehensive and in-depth understanding of why and how software architecture and performance analysis are integrated to guide related future research. To fill this gap, this paper presents a systematic mapping study of 109 papers that integrate software architecture and performance analysis. We focused on five research questions that provide guidance for researchers and practitioners to gain an in-depth understanding of this research area. These questions addressed: a systematic mapping of related studies based on the high-level research purposes and specific focuses (RQ1), the software development activities these studies intended to facilitate (RQ2), the typical study templates of different research purposes (RQ3), the available tools and instruments for automating the analysis (RQ4), and the evaluation methodology employed in the studies (RQ5). Through these research questions, we also identified critical research gaps and future directions, including: 1) the lack of available tools and benchmark datasets to support replication, cross-validation and comparison of studies; 2) the need for architecture and performance analysis techniques that handle the challenges in emerging software domains; 3) the lack of consideration of practical factors that impact the adoption of the architecture and performance analysis approaches; and finally 4) the need for the adoption of modern ML/AI techniques to efficiently integrate architecture and performance analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17372v1-abstract-full').style.display = 'none'; document.getElementById('2410.17372v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15919">arXiv:2410.15919</a> <span> [<a href="https://arxiv.org/pdf/2410.15919">pdf</a>, <a href="https://arxiv.org/format/2410.15919">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Are Large-scale Soft Labels Necessary for Large-scale Dataset Distillation? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lingao Xiao</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yang He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15919v2-abstract-short" style="display: inline;"> In ImageNet-condensation, the storage for auxiliary soft labels exceeds that of the condensed dataset by over 30 times. However, are large-scale soft labels necessary for large-scale dataset distillation? In this paper, we first discover that the high within-class similarity in condensed datasets necessitates the use of large-scale soft labels. This high within-class similarity can be attributed t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15919v2-abstract-full').style.display = 'inline'; document.getElementById('2410.15919v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15919v2-abstract-full" style="display: none;"> In ImageNet-condensation, the storage for auxiliary soft labels exceeds that of the condensed dataset by over 30 times. However, are large-scale soft labels necessary for large-scale dataset distillation? In this paper, we first discover that the high within-class similarity in condensed datasets necessitates the use of large-scale soft labels. This high within-class similarity can be attributed to the fact that previous methods use samples from different classes to construct a single batch for batch normalization (BN) matching. To reduce the within-class similarity, we introduce class-wise supervision during the image synthesizing process by batching the samples within classes, instead of across classes. As a result, we can increase within-class diversity and reduce the size of required soft labels. A key benefit of improved image diversity is that soft label compression can be achieved through simple random pruning, eliminating the need for complex rule-based strategies. Experiments validate our discoveries. For example, when condensing ImageNet-1K to 200 images per class, our approach compresses the required soft labels from 113 GB to 2.8 GB (40x compression) with a 2.6% performance gain. Code is available at: https://github.com/he-y/soft-label-pruning-for-dataset-distillation <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15919v2-abstract-full').style.display = 'none'; document.getElementById('2410.15919v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08021">arXiv:2410.08021</a> <span> [<a href="https://arxiv.org/pdf/2410.08021">pdf</a>, <a href="https://arxiv.org/format/2410.08021">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> OneRef: Unified One-tower Expression Grounding and Segmentation with Mask Referring Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Linhui Xiao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaoshan Yang</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+F">Fang Peng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yaowei Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+C">Changsheng Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08021v2-abstract-short" style="display: inline;"> Constrained by the separate encoding of vision and language, existing grounding and referring segmentation works heavily rely on bulky Transformer-based fusion en-/decoders and a variety of early-stage interaction technologies. Simultaneously, the current mask visual language modeling (MVLM) fails to capture the nuanced referential relationship between image-text in referring tasks. In this paper,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08021v2-abstract-full').style.display = 'inline'; document.getElementById('2410.08021v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08021v2-abstract-full" style="display: none;"> Constrained by the separate encoding of vision and language, existing grounding and referring segmentation works heavily rely on bulky Transformer-based fusion en-/decoders and a variety of early-stage interaction technologies. Simultaneously, the current mask visual language modeling (MVLM) fails to capture the nuanced referential relationship between image-text in referring tasks. In this paper, we propose OneRef, a minimalist referring framework built on the modality-shared one-tower transformer that unifies the visual and linguistic feature spaces. To modeling the referential relationship, we introduce a novel MVLM paradigm called Mask Referring Modeling (MRefM), which encompasses both referring-aware mask image modeling and referring-aware mask language modeling. Both modules not only reconstruct modality-related content but also cross-modal referring content. Within MRefM, we propose a referring-aware dynamic image masking strategy that is aware of the referred region rather than relying on fixed ratios or generic random masking schemes. By leveraging the unified visual language feature space and incorporating MRefM's ability to model the referential relations, our approach enables direct regression of the referring results without resorting to various complex techniques. Our method consistently surpasses existing approaches and achieves SoTA performance on both grounding and segmentation tasks, providing valuable insights for future research. Our code and models are available at https://github.com/linhuixiao/OneRef. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08021v2-abstract-full').style.display = 'none'; document.getElementById('2410.08021v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2024. The project page: https://github.com/linhuixiao/OneRef</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07701">arXiv:2410.07701</a> <span> [<a href="https://arxiv.org/pdf/2410.07701">pdf</a>, <a href="https://arxiv.org/format/2410.07701">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Autonomous Driving in Unstructured Environments: How Far Have We Come? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Min%2C+C">Chen Min</a>, <a href="/search/cs?searchtype=author&query=Si%2C+S">Shubin Si</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xu Wang</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+H">Hanzhang Xue</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+W">Weizhong Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Juan Wang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Q">Qingtian Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Q">Qi Zhu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+L">Lun Luo</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+F">Fanjie Kong</a>, <a href="/search/cs?searchtype=author&query=Miao%2C+J">Jinyu Miao</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+X">Xudong Cai</a>, <a href="/search/cs?searchtype=author&query=An%2C+S">Shuai An</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wei Li</a>, <a href="/search/cs?searchtype=author&query=Mei%2C+J">Jilin Mei</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+T">Tong Sun</a>, <a href="/search/cs?searchtype=author&query=Zhai%2C+H">Heng Zhai</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qifeng Liu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+F">Fangzhou Zhao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Liang Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuai Wang</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+E">Erke Shang</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+L">Linzhi Shang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+K">Kunlong Zhao</a> , et al. (13 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07701v3-abstract-short" style="display: inline;"> Research on autonomous driving in unstructured outdoor environments is less advanced than in structured urban settings due to challenges like environmental diversities and scene complexity. These environments-such as rural areas and rugged terrains-pose unique obstacles that are not common in structured urban areas. Despite these difficulties, autonomous driving in unstructured outdoor environment… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07701v3-abstract-full').style.display = 'inline'; document.getElementById('2410.07701v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07701v3-abstract-full" style="display: none;"> Research on autonomous driving in unstructured outdoor environments is less advanced than in structured urban settings due to challenges like environmental diversities and scene complexity. These environments-such as rural areas and rugged terrains-pose unique obstacles that are not common in structured urban areas. Despite these difficulties, autonomous driving in unstructured outdoor environments is crucial for applications in agriculture, mining, and military operations. Our survey reviews over 250 papers for autonomous driving in unstructured outdoor environments, covering offline mapping, pose estimation, environmental perception, path planning, end-to-end autonomous driving, datasets, and relevant challenges. We also discuss emerging trends and future research directions. This review aims to consolidate knowledge and encourage further research for autonomous driving in unstructured environments. To support ongoing work, we maintain an active repository with up-to-date literature and open-source projects at: https://github.com/chaytonmin/Survey-Autonomous-Driving-in-Unstructured-Environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07701v3-abstract-full').style.display = 'none'; document.getElementById('2410.07701v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Survey paper; 38 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05779">arXiv:2410.05779</a> <span> [<a href="https://arxiv.org/pdf/2410.05779">pdf</a>, <a href="https://arxiv.org/format/2410.05779">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LightRAG: Simple and Fast Retrieval-Augmented Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Zirui Guo</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+L">Lianghao Xia</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yanhua Yu</a>, <a href="/search/cs?searchtype=author&query=Ao%2C+T">Tu Ao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chao Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05779v2-abstract-short" style="display: inline;"> Retrieval-Augmented Generation (RAG) systems enhance large language models (LLMs) by integrating external knowledge sources, enabling more accurate and contextually relevant responses tailored to user needs. However, existing RAG systems have significant limitations, including reliance on flat data representations and inadequate contextual awareness, which can lead to fragmented answers that fail… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05779v2-abstract-full').style.display = 'inline'; document.getElementById('2410.05779v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05779v2-abstract-full" style="display: none;"> Retrieval-Augmented Generation (RAG) systems enhance large language models (LLMs) by integrating external knowledge sources, enabling more accurate and contextually relevant responses tailored to user needs. However, existing RAG systems have significant limitations, including reliance on flat data representations and inadequate contextual awareness, which can lead to fragmented answers that fail to capture complex inter-dependencies. To address these challenges, we propose LightRAG, which incorporates graph structures into text indexing and retrieval processes. This innovative framework employs a dual-level retrieval system that enhances comprehensive information retrieval from both low-level and high-level knowledge discovery. Additionally, the integration of graph structures with vector representations facilitates efficient retrieval of related entities and their relationships, significantly improving response times while maintaining contextual relevance. This capability is further enhanced by an incremental update algorithm that ensures the timely integration of new data, allowing the system to remain effective and responsive in rapidly changing data environments. Extensive experimental validation demonstrates considerable improvements in retrieval accuracy and efficiency compared to existing approaches. We have made our LightRAG open-source and available at the link: https://github.com/HKUDS/LightRAG. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05779v2-abstract-full').style.display = 'none'; document.getElementById('2410.05779v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04752">arXiv:2410.04752</a> <span> [<a href="https://arxiv.org/pdf/2410.04752">pdf</a>, <a href="https://arxiv.org/format/2410.04752">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Document-level Causal Relation Extraction with Knowledge-guided Binary Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zimu Wang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+L">Lei Xia</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wei Wang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+X">Xinya Du</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04752v1-abstract-short" style="display: inline;"> As an essential task in information extraction (IE), Event-Event Causal Relation Extraction (ECRE) aims to identify and classify the causal relationships between event mentions in natural language texts. However, existing research on ECRE has highlighted two critical challenges, including the lack of document-level modeling and causal hallucinations. In this paper, we propose a Knowledge-guided bi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04752v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04752v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04752v1-abstract-full" style="display: none;"> As an essential task in information extraction (IE), Event-Event Causal Relation Extraction (ECRE) aims to identify and classify the causal relationships between event mentions in natural language texts. However, existing research on ECRE has highlighted two critical challenges, including the lack of document-level modeling and causal hallucinations. In this paper, we propose a Knowledge-guided binary Question Answering (KnowQA) method with event structures for ECRE, consisting of two stages: Event Structure Construction and Binary Question Answering. We conduct extensive experiments under both zero-shot and fine-tuning settings with large language models (LLMs) on the MECI and MAVEN-ERE datasets. Experimental results demonstrate the usefulness of event structures on document-level ECRE and the effectiveness of KnowQA by achieving state-of-the-art on the MECI dataset. We observe not only the effectiveness but also the high generalizability and low inconsistency of our method, particularly when with complete event structures after fine-tuning the models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04752v1-abstract-full').style.display = 'none'; document.getElementById('2410.04752v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at Findings of EMNLP 2024. Camera-ready version</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04179">arXiv:2410.04179</a> <span> [<a href="https://arxiv.org/pdf/2410.04179">pdf</a>, <a href="https://arxiv.org/format/2410.04179">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Theoretical Economics">econ.TH</span> </div> </div> <p class="title is-5 mathjax"> Computing Most Equitable Voting Rules </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xia%2C+L">Lirong Xia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04179v1-abstract-short" style="display: inline;"> How to design fair and (computationally) efficient voting rules is a central challenge in Computational Social Choice. In this paper, we aim at designing efficient algorithms for computing most equitable rules for large classes of preferences and decisions, which optimally satisfy two fundamental fairness/equity axioms: anonymity (every voter being treated equally) and neutrality (every alternativ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04179v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04179v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04179v1-abstract-full" style="display: none;"> How to design fair and (computationally) efficient voting rules is a central challenge in Computational Social Choice. In this paper, we aim at designing efficient algorithms for computing most equitable rules for large classes of preferences and decisions, which optimally satisfy two fundamental fairness/equity axioms: anonymity (every voter being treated equally) and neutrality (every alternative being treated equally). By revealing a natural connection to the graph isomorphism problem and leveraging recent breakthroughs by Babai [2019], we design quasipolynomial-time algorithms that compute most equitable rules with verifications, which also compute verifications about whether anonymity and neutrality are satisfied at the input profile. Further extending this approach, we propose the canonical-labeling tie-breaking, which runs in quasipolynomial-time and optimally breaks ties to preserve anonymity and neutrality. As for the complexity lower bound, we prove that even computing verifications for most equitable rules is GI-complete (i.e., as hard as the graph isomorphism problem), and sometimes GA-complete (i.e., as hard as the graph automorphism problem), for many commonly studied combinations of preferences and decisions. To the best of our knowledge, these are the first problems in computational social choice that are known to be complete in the class GI or GA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04179v1-abstract-full').style.display = 'none'; document.getElementById('2410.04179v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01249">arXiv:2410.01249</a> <span> [<a href="https://arxiv.org/pdf/2410.01249">pdf</a>, <a href="https://arxiv.org/format/2410.01249">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Dual Approximation Policy Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiong%2C+Z">Zhihan Xiong</a>, <a href="/search/cs?searchtype=author&query=Fazel%2C+M">Maryam Fazel</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lin Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01249v1-abstract-short" style="display: inline;"> We propose Dual Approximation Policy Optimization (DAPO), a framework that incorporates general function approximation into policy mirror descent methods. In contrast to the popular approach of using the $L_2$-norm to measure function approximation errors, DAPO uses the dual Bregman divergence induced by the mirror map for policy projection. This duality framework has both theoretical and practica… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01249v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01249v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01249v1-abstract-full" style="display: none;"> We propose Dual Approximation Policy Optimization (DAPO), a framework that incorporates general function approximation into policy mirror descent methods. In contrast to the popular approach of using the $L_2$-norm to measure function approximation errors, DAPO uses the dual Bregman divergence induced by the mirror map for policy projection. This duality framework has both theoretical and practical implications: not only does it achieve fast linear convergence with general function approximation, but it also includes several well-known practical methods as special cases, immediately providing strong convergence guarantees. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01249v1-abstract-full').style.display = 'none'; document.getElementById('2410.01249v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17946">arXiv:2409.17946</a> <span> [<a href="https://arxiv.org/pdf/2409.17946">pdf</a>, <a href="https://arxiv.org/format/2409.17946">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Weak-to-Strong Backdoor Attack for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Shuai Zhao</a>, <a href="/search/cs?searchtype=author&query=Gan%2C+L">Leilei Gan</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Zhongliang Guo</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiaobao Wu</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Luwei Xiao</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiaoyu Xu</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+C">Cong-Duy Nguyen</a>, <a href="/search/cs?searchtype=author&query=Tuan%2C+L+A">Luu Anh Tuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17946v3-abstract-short" style="display: inline;"> Despite being widely applied due to their exceptional capabilities, Large Language Models (LLMs) have been proven to be vulnerable to backdoor attacks. These attacks introduce targeted vulnerabilities into LLMs by poisoning training samples and full-parameter fine-tuning. However, this kind of backdoor attack is limited since they require significant computational resources, especially as the size… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17946v3-abstract-full').style.display = 'inline'; document.getElementById('2409.17946v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17946v3-abstract-full" style="display: none;"> Despite being widely applied due to their exceptional capabilities, Large Language Models (LLMs) have been proven to be vulnerable to backdoor attacks. These attacks introduce targeted vulnerabilities into LLMs by poisoning training samples and full-parameter fine-tuning. However, this kind of backdoor attack is limited since they require significant computational resources, especially as the size of LLMs increases. Besides, parameter-efficient fine-tuning (PEFT) offers an alternative but the restricted parameter updating may impede the alignment of triggers with target labels. In this study, we first verify that backdoor attacks with PEFT may encounter challenges in achieving feasible performance. To address these issues and improve the effectiveness of backdoor attacks with PEFT, we propose a novel backdoor attack algorithm from weak to strong based on feature alignment-enhanced knowledge distillation (W2SAttack). Specifically, we poison small-scale language models through full-parameter fine-tuning to serve as the teacher model. The teacher model then covertly transfers the backdoor to the large-scale student model through feature alignment-enhanced knowledge distillation, which employs PEFT. Theoretical analysis reveals that W2SAttack has the potential to augment the effectiveness of backdoor attacks. We demonstrate the superior performance of W2SAttack on classification tasks across four language models, four backdoor attack algorithms, and two different architectures of teacher models. Experimental results indicate success rates close to 100% for backdoor attacks targeting PEFT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17946v3-abstract-full').style.display = 'none'; document.getElementById('2409.17946v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17512">arXiv:2409.17512</a> <span> [<a href="https://arxiv.org/pdf/2409.17512">pdf</a>, <a href="https://arxiv.org/format/2409.17512">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SCOMatch: Alleviating Overtrusting in Open-set Semi-supervised Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zerun Wang</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+L">Liuyu Xiang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+L">Lang Huang</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+J">Jiafeng Mao</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Ling Xiao</a>, <a href="/search/cs?searchtype=author&query=Yamasaki%2C+T">Toshihiko Yamasaki</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17512v1-abstract-short" style="display: inline;"> Open-set semi-supervised learning (OSSL) leverages practical open-set unlabeled data, comprising both in-distribution (ID) samples from seen classes and out-of-distribution (OOD) samples from unseen classes, for semi-supervised learning (SSL). Prior OSSL methods initially learned the decision boundary between ID and OOD with labeled ID data, subsequently employing self-training to refine this boun… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17512v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17512v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17512v1-abstract-full" style="display: none;"> Open-set semi-supervised learning (OSSL) leverages practical open-set unlabeled data, comprising both in-distribution (ID) samples from seen classes and out-of-distribution (OOD) samples from unseen classes, for semi-supervised learning (SSL). Prior OSSL methods initially learned the decision boundary between ID and OOD with labeled ID data, subsequently employing self-training to refine this boundary. These methods, however, suffer from the tendency to overtrust the labeled ID data: the scarcity of labeled data caused the distribution bias between the labeled samples and the entire ID data, which misleads the decision boundary to overfit. The subsequent self-training process, based on the overfitted result, fails to rectify this problem. In this paper, we address the overtrusting issue by treating OOD samples as an additional class, forming a new SSL process. Specifically, we propose SCOMatch, a novel OSSL method that 1) selects reliable OOD samples as new labeled data with an OOD memory queue and a corresponding update strategy and 2) integrates the new SSL process into the original task through our Simultaneous Close-set and Open-set self-training. SCOMatch refines the decision boundary of ID and OOD classes across the entire dataset, thereby leading to improved results. Extensive experimental results show that SCOMatch significantly outperforms the state-of-the-art methods on various benchmarks. The effectiveness is further verified through ablation studies and visualization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17512v1-abstract-full').style.display = 'none'; document.getElementById('2409.17512v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECCV 2024 accepted</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16678">arXiv:2409.16678</a> <span> [<a href="https://arxiv.org/pdf/2409.16678">pdf</a>, <a href="https://arxiv.org/format/2409.16678">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> TSBP: Improving Object Detection in Histology Images via Test-time Self-guided Bounding-box Propagation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+T">Tingting Yang</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Liang Xiao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yizhe Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16678v1-abstract-short" style="display: inline;"> A global threshold (e.g., 0.5) is often applied to determine which bounding boxes should be included in the final results for an object detection task. A higher threshold reduces false positives but may result in missing a significant portion of true positives. A lower threshold can increase detection recall but may also result in more false positives. Because of this, using a preset global thresh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16678v1-abstract-full').style.display = 'inline'; document.getElementById('2409.16678v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16678v1-abstract-full" style="display: none;"> A global threshold (e.g., 0.5) is often applied to determine which bounding boxes should be included in the final results for an object detection task. A higher threshold reduces false positives but may result in missing a significant portion of true positives. A lower threshold can increase detection recall but may also result in more false positives. Because of this, using a preset global threshold (e.g., 0.5) applied to all the bounding box candidates may lead to suboptimal solutions. In this paper, we propose a Test-time Self-guided Bounding-box Propagation (TSBP) method, leveraging Earth Mover's Distance (EMD) to enhance object detection in histology images. TSBP utilizes bounding boxes with high confidence to influence those with low confidence, leveraging visual similarities between them. This propagation mechanism enables bounding boxes to be selected in a controllable, explainable, and robust manner, which surpasses the effectiveness of using simple thresholds and uncertainty calibration methods. Importantly, TSBP does not necessitate additional labeled samples for model training or parameter estimation, unlike calibration methods. We conduct experiments on gland detection and cell detection tasks in histology images. The results show that our proposed TSBP significantly improves detection outcomes when working in conjunction with state-of-the-art deep learning-based detection networks. Compared to other methods such as uncertainty calibration, TSBP yields more robust and accurate object detection predictions while using no additional labeled samples. The code is available at https://github.com/jwhgdeu/TSBP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16678v1-abstract-full').style.display = 'none'; document.getElementById('2409.16678v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MICCAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15730">arXiv:2409.15730</a> <span> [<a href="https://arxiv.org/pdf/2409.15730">pdf</a>, <a href="https://arxiv.org/format/2409.15730">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Learning Multiple Probabilistic Decisions from Latent World Model in Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lingyu Xiao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiang-Jiang Liu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Sen Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaofan Li</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaoqing Ye</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Wankou Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jingdong Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15730v1-abstract-short" style="display: inline;"> The autoregressive world model exhibits robust generalization capabilities in vectorized scene understanding but encounters difficulties in deriving actions due to insufficient uncertainty modeling and self-delusion. In this paper, we explore the feasibility of deriving decisions from an autoregressive world model by addressing these challenges through the formulation of multiple probabilistic hyp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15730v1-abstract-full').style.display = 'inline'; document.getElementById('2409.15730v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15730v1-abstract-full" style="display: none;"> The autoregressive world model exhibits robust generalization capabilities in vectorized scene understanding but encounters difficulties in deriving actions due to insufficient uncertainty modeling and self-delusion. In this paper, we explore the feasibility of deriving decisions from an autoregressive world model by addressing these challenges through the formulation of multiple probabilistic hypotheses. We propose LatentDriver, a framework models the environment's next states and the ego vehicle's possible actions as a mixture distribution, from which a deterministic control signal is then derived. By incorporating mixture modeling, the stochastic nature of decisionmaking is captured. Additionally, the self-delusion problem is mitigated by providing intermediate actions sampled from a distribution to the world model. Experimental results on the recently released close-loop benchmark Waymax demonstrate that LatentDriver surpasses state-of-the-art reinforcement learning and imitation learning methods, achieving expert-level performance. The code and models will be made available at https://github.com/Sephirex-X/LatentDriver. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15730v1-abstract-full').style.display = 'none'; document.getElementById('2409.15730v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15156">arXiv:2409.15156</a> <span> [<a href="https://arxiv.org/pdf/2409.15156">pdf</a>, <a href="https://arxiv.org/format/2409.15156">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Rethinking Conventional Wisdom in Machine Learning: From Generalization to Scaling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lechao Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15156v1-abstract-short" style="display: inline;"> The remarkable success of large language pretraining and the discovery of scaling laws signify a paradigm shift in machine learning. Notably, the primary objective has evolved from minimizing generalization error to reducing approximation error, and the most effective strategy has transitioned from regularization (in a broad sense) to scaling up models. This raises a critical question: Do the es… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15156v1-abstract-full').style.display = 'inline'; document.getElementById('2409.15156v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15156v1-abstract-full" style="display: none;"> The remarkable success of large language pretraining and the discovery of scaling laws signify a paradigm shift in machine learning. Notably, the primary objective has evolved from minimizing generalization error to reducing approximation error, and the most effective strategy has transitioned from regularization (in a broad sense) to scaling up models. This raises a critical question: Do the established principles that proved successful in the generalization-centric era remain valid in this new era of scaling? This paper examines several influential regularization-based principles that may no longer hold true in the scaling-centric, large language model (LLM) era. These principles include explicit L2 regularization and implicit regularization through small batch sizes and large learning rates. Additionally, we identify a new phenomenon termed ``scaling law crossover,'' where two scaling curves intersect at a certain scale, implying that methods effective at smaller scales may not generalize to larger ones. Together, these observations highlight two fundamental questions within this new paradigm: $\bullet$ Guiding Principles for Scaling: If regularization is no longer the primary guiding principle for model design, what new principles are emerging to guide scaling? $\bullet$ Model Comparison at Scale: How to reliably and effectively compare models at the scale where only a single experiment is feasible? <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15156v1-abstract-full').style.display = 'none'; document.getElementById('2409.15156v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13221">arXiv:2409.13221</a> <span> [<a href="https://arxiv.org/pdf/2409.13221">pdf</a>, <a href="https://arxiv.org/format/2409.13221">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> RLHFuse: Efficient RLHF Training for Large Language Models with Inter- and Intra-Stage Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhong%2C+Y">Yinmin Zhong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zili Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+B">Bingyang Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shengyu Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yukun Chen</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+C">Changyi Wan</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+H">Hanpeng Hu</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+L">Lei Xia</a>, <a href="/search/cs?searchtype=author&query=Ming%2C+R">Ranchen Ming</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yibo Zhu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+X">Xin Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13221v2-abstract-short" style="display: inline;"> Reinforcement Learning from Human Feedback (RLHF) enhances the alignment between LLMs and human preference. The workflow of RLHF typically involves several models and tasks in a series of distinct stages. Existing RLHF training systems view each task as the smallest execution unit thus overlooking the opportunities for subtask-level optimizations. Due to the intrinsic nature of RLHF training, i.e.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13221v2-abstract-full').style.display = 'inline'; document.getElementById('2409.13221v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13221v2-abstract-full" style="display: none;"> Reinforcement Learning from Human Feedback (RLHF) enhances the alignment between LLMs and human preference. The workflow of RLHF typically involves several models and tasks in a series of distinct stages. Existing RLHF training systems view each task as the smallest execution unit thus overlooking the opportunities for subtask-level optimizations. Due to the intrinsic nature of RLHF training, i.e., the data skewness in the generation stage, and the pipeline bubbles in the training stage, existing RLHF systems suffer from low GPU utilization in production deployments. RLHFuse breaks the traditional view of RLHF workflow as a composition of individual tasks, splitting each task into finer-grained subtasks, and performing stage fusion to improve GPU utilization. RLHFuse contains two key ideas. First, for generation and inference tasks, RLHFuse splits them into sample-level subtasks, enabling efficient inter-stage fusion to mitigate the original generation bottleneck dominated by long-tailed samples. Second, for training tasks, RLHFuse breaks them into subtasks of micro-batches. By leveraging the intuition that pipeline execution can be essentially complemented by another pipeline, RLHFuse performs intra-stage fusion to concurrently execute these subtasks in the training stage with a fused pipeline schedule, resulting in fewer pipeline bubbles. In addition, RLHFuse incorporates a series of system optimizations tailored for each stage of RLHF, making it efficient and scalable for our internal product usage. We evaluate RLHFuse on various popular LLMs and the results show that RLHFuse increases the training throughput by up to 3.7x, compared to existing state-of-the-art systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13221v2-abstract-full').style.display = 'none'; document.getElementById('2409.13221v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.10077">arXiv:2409.10077</a> <span> [<a href="https://arxiv.org/pdf/2409.10077">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LLM-DER:A Named Entity Recognition Method Based on Large Language Models for Chinese Coal Chemical Domain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Le Xiao</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yunfei Xu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jing Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.10077v1-abstract-short" style="display: inline;"> Domain-specific Named Entity Recognition (NER), whose goal is to recognize domain-specific entities and their categories, provides an important support for constructing domain knowledge graphs. Currently, deep learning-based methods are widely used and effective in NER tasks, but due to the reliance on large-scale labeled data. As a result, the scarcity of labeled data in a specific domain will li… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10077v1-abstract-full').style.display = 'inline'; document.getElementById('2409.10077v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.10077v1-abstract-full" style="display: none;"> Domain-specific Named Entity Recognition (NER), whose goal is to recognize domain-specific entities and their categories, provides an important support for constructing domain knowledge graphs. Currently, deep learning-based methods are widely used and effective in NER tasks, but due to the reliance on large-scale labeled data. As a result, the scarcity of labeled data in a specific domain will limit its application.Therefore, many researches started to introduce few-shot methods and achieved some results. However, the entity structures in specific domains are often complex, and the current few-shot methods are difficult to adapt to NER tasks with complex features.Taking the Chinese coal chemical industry domain as an example,there exists a complex structure of multiple entities sharing a single entity, as well as multiple relationships for the same pair of entities, which affects the NER task under the sample less condition.In this paper, we propose a Large Language Models (LLMs)-based entity recognition framework LLM-DER for the domain-specific entity recognition problem in Chinese, which enriches the entity information by generating a list of relationships containing entity types through LLMs, and designing a plausibility and consistency evaluation method to remove misrecognized entities, which can effectively solve the complex structural entity recognition problem in a specific domain.The experimental results of this paper on the Resume dataset and the self-constructed coal chemical dataset Coal show that LLM-DER performs outstandingly in domain-specific entity recognition, not only outperforming the existing GPT-3.5-turbo baseline, but also exceeding the fully-supervised baseline, verifying its effectiveness in entity recognition. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10077v1-abstract-full').style.display = 'none'; document.getElementById('2409.10077v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.09661">arXiv:2409.09661</a> <span> [<a href="https://arxiv.org/pdf/2409.09661">pdf</a>, <a href="https://arxiv.org/format/2409.09661">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3691620.3695349">10.1145/3691620.3695349 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> ContractTinker: LLM-Empowered Vulnerability Repair for Real-World Smart Contracts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+C">Che Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiashuo Zhang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jianbo Gao</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+L">Libin Xia</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+Z">Zhi Guan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.09661v1-abstract-short" style="display: inline;"> Smart contracts are susceptible to being exploited by attackers, especially when facing real-world vulnerabilities. To mitigate this risk, developers often rely on third-party audit services to identify potential vulnerabilities before project deployment. Nevertheless, repairing the identified vulnerabilities is still complex and labor-intensive, particularly for developers lacking security expert… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09661v1-abstract-full').style.display = 'inline'; document.getElementById('2409.09661v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.09661v1-abstract-full" style="display: none;"> Smart contracts are susceptible to being exploited by attackers, especially when facing real-world vulnerabilities. To mitigate this risk, developers often rely on third-party audit services to identify potential vulnerabilities before project deployment. Nevertheless, repairing the identified vulnerabilities is still complex and labor-intensive, particularly for developers lacking security expertise. Moreover, existing pattern-based repair tools mostly fail to address real-world vulnerabilities due to their lack of high-level semantic understanding. To fill this gap, we propose ContractTinker, a Large Language Models (LLMs)-empowered tool for real-world vulnerability repair. The key insight is our adoption of the Chain-of-Thought approach to break down the entire generation task into sub-tasks. Additionally, to reduce hallucination, we integrate program static analysis to guide the LLM. We evaluate ContractTinker on 48 high-risk vulnerabilities. The experimental results show that among the patches generated by ContractTinker, 23 (48%) are valid patches that fix the vulnerabilities, while 10 (21%) require only minor modifications. A video of ContractTinker is available at https://youtu.be/HWFVi-YHcPE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09661v1-abstract-full').style.display = 'none'; document.getElementById('2409.09661v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">4 pages, and to be accepted in ASE2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.07033">arXiv:2409.07033</a> <span> [<a href="https://arxiv.org/pdf/2409.07033">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.53469/jtpes.2024.04(03).20">10.53469/jtpes.2024.04(03).20 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> E-commerce Webpage Recommendation Scheme Base on Semantic Mining and Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Wenchao Zhao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaoyi Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+R">Ruilin Xu</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lingxi Xiao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Muqing Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.07033v1-abstract-short" style="display: inline;"> In e-commerce websites, web mining web page recommendation technology has been widely used. However, recommendation solutions often cannot meet the actual application needs of online shopping users. To address this problem, this paper proposes an e-commerce web page recommendation solution that combines semantic web mining and BP neural networks. First, the web logs of user searches are processed,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07033v1-abstract-full').style.display = 'inline'; document.getElementById('2409.07033v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.07033v1-abstract-full" style="display: none;"> In e-commerce websites, web mining web page recommendation technology has been widely used. However, recommendation solutions often cannot meet the actual application needs of online shopping users. To address this problem, this paper proposes an e-commerce web page recommendation solution that combines semantic web mining and BP neural networks. First, the web logs of user searches are processed, and 5 features are extracted: content priority, time consumption priority, online shopping users' explicit/implicit feedback on the website, recommendation semantics and input deviation amount. Then, these features are used as input features of the BP neural network to classify and identify the priority of the final output web page. Finally, the web pages are sorted according to priority and recommended to users. This project uses book sales webpages as samples for experiments. The results show that this solution can quickly and accurately identify the webpages required by users. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07033v1-abstract-full').style.display = 'none'; document.getElementById('2409.07033v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2409.01137</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.06748">arXiv:2409.06748</a> <span> [<a href="https://arxiv.org/pdf/2409.06748">pdf</a>, <a href="https://arxiv.org/format/2409.06748">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> EasyST: A Simple Framework for Spatio-Temporal Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jiabin Tang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+W">Wei Wei</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+L">Lianghao Xia</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chao Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.06748v1-abstract-short" style="display: inline;"> Spatio-temporal prediction is a crucial research area in data-driven urban computing, with implications for transportation, public safety, and environmental monitoring. However, scalability and generalization challenges remain significant obstacles. Advanced models often rely on Graph Neural Networks to encode spatial and temporal correlations, but struggle with the increased complexity of large-s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06748v1-abstract-full').style.display = 'inline'; document.getElementById('2409.06748v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.06748v1-abstract-full" style="display: none;"> Spatio-temporal prediction is a crucial research area in data-driven urban computing, with implications for transportation, public safety, and environmental monitoring. However, scalability and generalization challenges remain significant obstacles. Advanced models often rely on Graph Neural Networks to encode spatial and temporal correlations, but struggle with the increased complexity of large-scale datasets. The recursive GNN-based message passing schemes used in these models hinder their training and deployment in real-life urban sensing scenarios. Moreover, long-spanning large-scale spatio-temporal data introduce distribution shifts, necessitating improved generalization performance. To address these challenges, we propose a simple framework for spatio-temporal prediction - EasyST paradigm. It learns lightweight and robust Multi-Layer Perceptrons (MLPs) by effectively distilling knowledge from complex spatio-temporal GNNs. We ensure robust knowledge distillation by integrating the spatio-temporal information bottleneck with teacher-bounded regression loss, filtering out task-irrelevant noise and avoiding erroneous guidance. We further enhance the generalization ability of the student model by incorporating spatial and temporal prompts to provide downstream task contexts. Evaluation on three spatio-temporal datasets for urban computing tasks demonstrates that EasyST surpasses state-of-the-art approaches in terms of efficiency and accuracy. The implementation code is available at: https://github.com/HKUDS/EasyST. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06748v1-abstract-full').style.display = 'none'; document.getElementById('2409.06748v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CIKM'2024, full paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00097">arXiv:2409.00097</a> <span> [<a href="https://arxiv.org/pdf/2409.00097">pdf</a>, <a href="https://arxiv.org/format/2409.00097">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Large Language Models for Disease Diagnosis: A Scoping Review </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+S">Shuang Zhou</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zidu Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Mian Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+C">Chunpu Xu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yawen Guo</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+Z">Zaifu Zhan</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+S">Sirui Ding</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiashuo Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+K">Kaishuai Xu</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Y">Yi Fang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+L">Liqiao Xia</a>, <a href="/search/cs?searchtype=author&query=Yeung%2C+J">Jeremy Yeung</a>, <a href="/search/cs?searchtype=author&query=Zha%2C+D">Daochen Zha</a>, <a href="/search/cs?searchtype=author&query=Melton%2C+G+B">Genevieve B. Melton</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+M">Mingquan Lin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rui Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00097v2-abstract-short" style="display: inline;"> Automatic disease diagnosis has become increasingly valuable in clinical practice. The advent of large language models (LLMs) has catalyzed a paradigm shift in artificial intelligence, with growing evidence supporting the efficacy of LLMs in diagnostic tasks. Despite the increasing attention in this field, a holistic view is still lacking. Many critical aspects remain unclear, such as the diseases… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00097v2-abstract-full').style.display = 'inline'; document.getElementById('2409.00097v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00097v2-abstract-full" style="display: none;"> Automatic disease diagnosis has become increasingly valuable in clinical practice. The advent of large language models (LLMs) has catalyzed a paradigm shift in artificial intelligence, with growing evidence supporting the efficacy of LLMs in diagnostic tasks. Despite the increasing attention in this field, a holistic view is still lacking. Many critical aspects remain unclear, such as the diseases and clinical data to which LLMs have been applied, the LLM techniques employed, and the evaluation methods used. In this article, we perform a comprehensive review of LLM-based methods for disease diagnosis. Our review examines the existing literature across various dimensions, including disease types and associated clinical specialties, clinical data, LLM techniques, and evaluation methods. Additionally, we offer recommendations for applying and evaluating LLMs for diagnostic tasks. Furthermore, we assess the limitations of current research and discuss future directions. To our knowledge, this is the first comprehensive review for LLM-based disease diagnosis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00097v2-abstract-full').style.display = 'none'; document.getElementById('2409.00097v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">69 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.16375">arXiv:2408.16375</a> <span> [<a href="https://arxiv.org/pdf/2408.16375">pdf</a>, <a href="https://arxiv.org/format/2408.16375">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> EasyChauffeur: A Baseline Advancing Simplicity and Efficiency on Waymax </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lingyu Xiao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiang-Jiang Liu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xiaoqing Ye</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Wankou Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jingdong Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.16375v1-abstract-short" style="display: inline;"> Recent advancements in deep-learning-based driving planners have primarily focused on elaborate network engineering, yielding limited improvements. This paper diverges from conventional approaches by exploring three fundamental yet underinvestigated aspects: training policy, data efficiency, and evaluation robustness. We introduce EasyChauffeur, a reproducible and effective planner for both imitat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16375v1-abstract-full').style.display = 'inline'; document.getElementById('2408.16375v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.16375v1-abstract-full" style="display: none;"> Recent advancements in deep-learning-based driving planners have primarily focused on elaborate network engineering, yielding limited improvements. This paper diverges from conventional approaches by exploring three fundamental yet underinvestigated aspects: training policy, data efficiency, and evaluation robustness. We introduce EasyChauffeur, a reproducible and effective planner for both imitation learning (IL) and reinforcement learning (RL) on Waymax, a GPU-accelerated simulator. Notably, our findings indicate that the incorporation of on-policy RL significantly boosts performance and data efficiency. To further enhance this efficiency, we propose SNE-Sampling, a novel method that selectively samples data from the encoder's latent space, substantially improving EasyChauffeur's performance with RL. Additionally, we identify a deficiency in current evaluation methods, which fail to accurately assess the robustness of different planners due to significant performance drops from minor changes in the ego vehicle's initial state. In response, we propose Ego-Shifting, a new evaluation setting for assessing planners' robustness. Our findings advocate for a shift from a primary focus on network architectures to adopting a holistic approach encompassing training strategies, data efficiency, and robust evaluation methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16375v1-abstract-full').style.display = 'none'; document.getElementById('2408.16375v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14735">arXiv:2408.14735</a> <span> [<a href="https://arxiv.org/pdf/2408.14735">pdf</a>, <a href="https://arxiv.org/format/2408.14735">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> PPVF: An Efficient Privacy-Preserving Online Video Fetching Framework with Correlated Differential Privacy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xianzhi Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yipeng Zhou</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+D">Di Wu</a>, <a href="/search/cs?searchtype=author&query=Sheng%2C+Q+Z">Quan Z. Sheng</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+M">Miao Hu</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Linchang Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14735v1-abstract-short" style="display: inline;"> Online video streaming has evolved into an integral component of the contemporary Internet landscape. Yet, the disclosure of user requests presents formidable privacy challenges. As users stream their preferred online videos, their requests are automatically seized by video content providers, potentially leaking users' privacy. Unfortunately, current protection methods are not well-suited to pre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14735v1-abstract-full').style.display = 'inline'; document.getElementById('2408.14735v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14735v1-abstract-full" style="display: none;"> Online video streaming has evolved into an integral component of the contemporary Internet landscape. Yet, the disclosure of user requests presents formidable privacy challenges. As users stream their preferred online videos, their requests are automatically seized by video content providers, potentially leaking users' privacy. Unfortunately, current protection methods are not well-suited to preserving user request privacy from content providers while maintaining high-quality online video services. To tackle this challenge, we introduce a novel Privacy-Preserving Video Fetching (PPVF) framework, which utilizes trusted edge devices to pre-fetch and cache videos, ensuring the privacy of users' requests while optimizing the efficiency of edge caching. More specifically, we design PPVF with three core components: (1) \textit{Online privacy budget scheduler}, which employs a theoretically guaranteed online algorithm to select non-requested videos as candidates with assigned privacy budgets. Alternative videos are chosen by an online algorithm that is theoretically guaranteed to consider both video utilities and available privacy budgets. (2) \textit{Noisy video request generator}, which generates redundant video requests (in addition to original ones) utilizing correlated differential privacy to obfuscate request privacy. (3) \textit{Online video utility predictor}, which leverages federated learning to collaboratively evaluate video utility in an online fashion, aiding in video selection in (1) and noise generation in (2). Finally, we conduct extensive experiments using real-world video request traces from Tencent Video. The results demonstrate that PPVF effectively safeguards user request privacy while upholding high video caching performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14735v1-abstract-full').style.display = 'none'; document.getElementById('2408.14735v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.10700">arXiv:2408.10700</a> <span> [<a href="https://arxiv.org/pdf/2408.10700">pdf</a>, <a href="https://arxiv.org/format/2408.10700">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AnyGraph: Graph Foundation Model in the Wild </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xia%2C+L">Lianghao Xia</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chao Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.10700v1-abstract-short" style="display: inline;"> The growing ubiquity of relational data structured as graphs has underscored the need for graph learning models with exceptional generalization capabilities. However, current approaches often struggle to effectively extract generalizable insights, frequently requiring extensive fine-tuning and limiting their versatility. Graph foundation models offer a transformative solution, with the potential t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10700v1-abstract-full').style.display = 'inline'; document.getElementById('2408.10700v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.10700v1-abstract-full" style="display: none;"> The growing ubiquity of relational data structured as graphs has underscored the need for graph learning models with exceptional generalization capabilities. However, current approaches often struggle to effectively extract generalizable insights, frequently requiring extensive fine-tuning and limiting their versatility. Graph foundation models offer a transformative solution, with the potential to learn robust, generalizable representations from graph data. This enables more effective and adaptable applications across a wide spectrum of tasks and domains. In this work, we investigate a unified graph model, AnyGraph, designed to handle key challenges: i) Structure Heterogenity. Addressing distribution shift in graph structural information; ii) Feature Heterogenity. Handling diverse feature representation spaces across graph datasets; iii) Fast Adaptation. Efficiently adapting the model to new graph domains; iv) Scaling Law Emergence. Enabling the model to exhibit scaling law behavior, where its performance scales favorably with the amount of data and parameter sizes. To tackle these critical challenges, we build the AnyGraph upon a Graph Mixture-of-Experts (MoE) architecture. This approach empowers the model to effectively manage both the in-domain and cross-domain distribution shift concerning structure-level and feature-level heterogeneity. Furthermore, a lightweight graph expert routing mechanism is proposed to facilitate AnyGraph's fast adaptability to new data and domains. Our extensive experiments on diverse 38 graph datasets have demonstrated the strong zero-shot learning performance of AnyGraph across diverse graph domains with significant distribution shift. Furthermore, we have validated the model's fast adaptation ability and scaling law emergence, showcasing its versatility. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10700v1-abstract-full').style.display = 'none'; document.getElementById('2408.10700v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.10670">arXiv:2408.10670</a> <span> [<a href="https://arxiv.org/pdf/2408.10670">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> A Noncontact Technique for Wave Measurement Based on Thermal Stereography and Deep Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+D">Deyu Li</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Longfei Xiao</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+H">Handi Wei</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yan Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Binghua Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.10670v1-abstract-short" style="display: inline;"> The accurate measurement of the wave field and its spatiotemporal evolution is essential in many hydrodynamic experiments and engineering applications. The binocular stereo imaging technique has been widely used to measure waves. However, the optical properties of indoor water surfaces, including transparency, specular reflection, and texture absence, pose challenges for image processing and stere… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10670v1-abstract-full').style.display = 'inline'; document.getElementById('2408.10670v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.10670v1-abstract-full" style="display: none;"> The accurate measurement of the wave field and its spatiotemporal evolution is essential in many hydrodynamic experiments and engineering applications. The binocular stereo imaging technique has been widely used to measure waves. However, the optical properties of indoor water surfaces, including transparency, specular reflection, and texture absence, pose challenges for image processing and stereo reconstruction. This study proposed a novel technique that combined thermal stereography and deep learning to achieve fully noncontact wave measurements. The optical imaging properties of water in the long-wave infrared spectrum were found to be suitable for stereo matching, effectively avoiding the issues in the visible-light spectrum. After capturing wave images using thermal stereo cameras, a reconstruction strategy involving deep learning techniques was proposed to improve stereo matching performance. A generative approach was employed to synthesize a dataset with ground-truth disparity from unannotated infrared images. This dataset was then fed to a pretrained stereo neural network for fine-tuning to achieve domain adaptation. Wave flume experiments were conducted to validate the feasibility and accuracy of the proposed technique. The final reconstruction results indicated great agreement and high accuracy with a mean bias of less than 2.1% compared with the measurements obtained using wave probes, suggesting that the novel technique effectively measures the spatiotemporal distribution of wave surface in hydrodynamic experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10670v1-abstract-full').style.display = 'none'; document.getElementById('2408.10670v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.10269">arXiv:2408.10269</a> <span> [<a href="https://arxiv.org/pdf/2408.10269">pdf</a>, <a href="https://arxiv.org/format/2408.10269">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> OpenCity: Open Spatio-Temporal Foundation Models for Traffic Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhonghang Li</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+L">Long Xia</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+L">Lei Shi</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yong Xu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chao Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.10269v1-abstract-short" style="display: inline;"> Accurate traffic forecasting is crucial for effective urban planning and transportation management, enabling efficient resource allocation and enhanced travel experiences. However, existing models often face limitations in generalization, struggling with zero-shot prediction on unseen regions and cities, as well as diminished long-term accuracy. This is primarily due to the inherent challenges in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10269v1-abstract-full').style.display = 'inline'; document.getElementById('2408.10269v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.10269v1-abstract-full" style="display: none;"> Accurate traffic forecasting is crucial for effective urban planning and transportation management, enabling efficient resource allocation and enhanced travel experiences. However, existing models often face limitations in generalization, struggling with zero-shot prediction on unseen regions and cities, as well as diminished long-term accuracy. This is primarily due to the inherent challenges in handling the spatial and temporal heterogeneity of traffic data, coupled with the significant distribution shift across time and space. In this work, we aim to unlock new possibilities for building versatile, resilient and adaptive spatio-temporal foundation models for traffic prediction. To achieve this goal, we introduce a novel foundation model, named OpenCity, that can effectively capture and normalize the underlying spatio-temporal patterns from diverse data characteristics, facilitating zero-shot generalization across diverse urban environments. OpenCity integrates the Transformer architecture with graph neural networks to model the complex spatio-temporal dependencies in traffic data. By pre-training OpenCity on large-scale, heterogeneous traffic datasets, we enable the model to learn rich, generalizable representations that can be seamlessly applied to a wide range of traffic forecasting scenarios. Experimental results demonstrate that OpenCity exhibits exceptional zero-shot predictive performance. Moreover, OpenCity showcases promising scaling laws, suggesting the potential for developing a truly one-for-all traffic prediction solution that can adapt to new urban contexts with minimal overhead. We made our proposed OpenCity model open-source and it is available at the following link: https://github.com/HKUDS/OpenCity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.10269v1-abstract-full').style.display = 'none'; document.getElementById('2408.10269v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08912">arXiv:2408.08912</a> <span> [<a href="https://arxiv.org/pdf/2408.08912">pdf</a>, <a href="https://arxiv.org/format/2408.08912">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Digital Libraries">cs.DL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> GeneticPrism: Multifaceted Visualization of Scientific Impact Evolutions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Ye Sun</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zipeng Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Y">Yuankai Luo</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+L">Lei Xia</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+L">Lei Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08912v1-abstract-short" style="display: inline;"> Understanding the evolution of scholarly impact is essential for many real-life decision-making processes in academia, such as research planning, frontier exploration, and award selection. Popular platforms like Google Scholar and Web of Science rely on numerical indicators that are too abstract to convey the context and content of scientific impact, while most existing visualization approaches on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08912v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08912v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08912v1-abstract-full" style="display: none;"> Understanding the evolution of scholarly impact is essential for many real-life decision-making processes in academia, such as research planning, frontier exploration, and award selection. Popular platforms like Google Scholar and Web of Science rely on numerical indicators that are too abstract to convey the context and content of scientific impact, while most existing visualization approaches on mapping science do not consider the presentation of individual scholars' impact evolution using curated self-citation data. This paper builds on our previous work and proposes an integrated pipeline to visualize a scholar's impact evolution from multiple topic facets. A novel 3D prism-shaped visual metaphor is introduced as the overview of a scholar's impact, whilst their scientific evolution on each topic is displayed in a more structured manner. Additional designs by topic chord diagram, streamgraph visualization, and inter-topic flow map, optimized by an elaborate layout algorithm, assist in perceiving the scholar's scientific evolution across topics. A new six-degree-impact glyph metaphor highlights key interdisciplinary works driving the evolution. The proposed visualization methods are evaluated through case studies analyzing the careers of prestigious Turing award laureates and a major visualization venue. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08912v1-abstract-full').style.display = 'none'; document.getElementById('2408.08912v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 8 figures, excluding appendix. Submitted to TVCG on 20240813</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.07852">arXiv:2408.07852</a> <span> [<a href="https://arxiv.org/pdf/2408.07852">pdf</a>, <a href="https://arxiv.org/format/2408.07852">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Training Language Models on the Knowledge Graph: Insights on Hallucinations and Their Detectability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hron%2C+J">Jiri Hron</a>, <a href="/search/cs?searchtype=author&query=Culp%2C+L">Laura Culp</a>, <a href="/search/cs?searchtype=author&query=Elsayed%2C+G">Gamaleldin Elsayed</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+R">Rosanne Liu</a>, <a href="/search/cs?searchtype=author&query=Adlam%2C+B">Ben Adlam</a>, <a href="/search/cs?searchtype=author&query=Bileschi%2C+M">Maxwell Bileschi</a>, <a href="/search/cs?searchtype=author&query=Bohnet%2C+B">Bernd Bohnet</a>, <a href="/search/cs?searchtype=author&query=Co-Reyes%2C+J">JD Co-Reyes</a>, <a href="/search/cs?searchtype=author&query=Fiedel%2C+N">Noah Fiedel</a>, <a href="/search/cs?searchtype=author&query=Freeman%2C+C+D">C. Daniel Freeman</a>, <a href="/search/cs?searchtype=author&query=Gur%2C+I">Izzeddin Gur</a>, <a href="/search/cs?searchtype=author&query=Kenealy%2C+K">Kathleen Kenealy</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Jaehoon Lee</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+P+J">Peter J. Liu</a>, <a href="/search/cs?searchtype=author&query=Mishra%2C+G">Gaurav Mishra</a>, <a href="/search/cs?searchtype=author&query=Mordatch%2C+I">Igor Mordatch</a>, <a href="/search/cs?searchtype=author&query=Nova%2C+A">Azade Nova</a>, <a href="/search/cs?searchtype=author&query=Novak%2C+R">Roman Novak</a>, <a href="/search/cs?searchtype=author&query=Parisi%2C+A">Aaron Parisi</a>, <a href="/search/cs?searchtype=author&query=Pennington%2C+J">Jeffrey Pennington</a>, <a href="/search/cs?searchtype=author&query=Rizkowsky%2C+A">Alex Rizkowsky</a>, <a href="/search/cs?searchtype=author&query=Simpson%2C+I">Isabelle Simpson</a>, <a href="/search/cs?searchtype=author&query=Sedghi%2C+H">Hanie Sedghi</a>, <a href="/search/cs?searchtype=author&query=Sohl-dickstein%2C+J">Jascha Sohl-dickstein</a>, <a href="/search/cs?searchtype=author&query=Swersky%2C+K">Kevin Swersky</a> , et al. (6 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.07852v1-abstract-short" style="display: inline;"> While many capabilities of language models (LMs) improve with increased training budget, the influence of scale on hallucinations is not yet fully understood. Hallucinations come in many forms, and there is no universally accepted definition. We thus focus on studying only those hallucinations where a correct answer appears verbatim in the training set. To fully control the training data content,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07852v1-abstract-full').style.display = 'inline'; document.getElementById('2408.07852v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.07852v1-abstract-full" style="display: none;"> While many capabilities of language models (LMs) improve with increased training budget, the influence of scale on hallucinations is not yet fully understood. Hallucinations come in many forms, and there is no universally accepted definition. We thus focus on studying only those hallucinations where a correct answer appears verbatim in the training set. To fully control the training data content, we construct a knowledge graph (KG)-based dataset, and use it to train a set of increasingly large LMs. We find that for a fixed dataset, larger and longer-trained LMs hallucinate less. However, hallucinating on $\leq5$% of the training data requires an order of magnitude larger model, and thus an order of magnitude more compute, than Hoffmann et al. (2022) reported was optimal. Given this costliness, we study how hallucination detectors depend on scale. While we see detector size improves performance on fixed LM's outputs, we find an inverse relationship between the scale of the LM and the detectability of its hallucinations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07852v1-abstract-full').style.display = 'none'; document.getElementById('2408.07852v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at COLM 2024. 16 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.07820">arXiv:2408.07820</a> <span> [<a href="https://arxiv.org/pdf/2408.07820">pdf</a>, <a href="https://arxiv.org/format/2408.07820">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Hybrid Semantic/Bit Communication Based Networking Problem Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xia%2C+L">Le Xia</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yao Sun</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lan Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/cs?searchtype=author&query=Imran%2C+M+A">Muhammad Ali Imran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.07820v2-abstract-short" style="display: inline;"> This paper jointly investigates user association (UA), mode selection (MS), and bandwidth allocation (BA) problems in a novel and practical next-generation cellular network where two modes of semantic communication (SemCom) and conventional bit communication (BitCom) coexist, namely hybrid semantic/bit communication network (HSB-Net). Concretely, we first identify a unified performance metric of m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07820v2-abstract-full').style.display = 'inline'; document.getElementById('2408.07820v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.07820v2-abstract-full" style="display: none;"> This paper jointly investigates user association (UA), mode selection (MS), and bandwidth allocation (BA) problems in a novel and practical next-generation cellular network where two modes of semantic communication (SemCom) and conventional bit communication (BitCom) coexist, namely hybrid semantic/bit communication network (HSB-Net). Concretely, we first identify a unified performance metric of message throughput for both SemCom and BitCom links. Next, we comprehensively develop a knowledge matching-aware two-stage tandem packet queuing model and theoretically derive the average packet loss ratio and queuing latency. Combined with several practical constraints, we then formulate a joint optimization problem for UA, MS, and BA to maximize the overall message throughput of HSB-Net. Afterward, we propose an optimal resource management strategy by employing a Lagrange primal-dual method and devising a preference list-based heuristic algorithm. Finally, numerical results validate the performance superiority of our proposed strategy compared with different benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07820v2-abstract-full').style.display = 'none'; document.getElementById('2408.07820v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted for publication and will be presented in 2024 IEEE Global Communications Conference (GlobeCom 2024). arXiv admin note: substantial text overlap with arXiv:2404.04162</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.06543">arXiv:2408.06543</a> <span> [<a href="https://arxiv.org/pdf/2408.06543">pdf</a>, <a href="https://arxiv.org/format/2408.06543">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> HDRGS: High Dynamic Range Gaussian Splatting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jiahao Wu</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lu Xiao</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+R">Rui Peng</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+K">Kaiqiang Xiong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Ronggang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.06543v3-abstract-short" style="display: inline;"> Recent years have witnessed substantial advancements in the field of 3D reconstruction from 2D images, particularly following the introduction of the neural radiance field (NeRF) technique. However, reconstructing a 3D high dynamic range (HDR) radiance field, which aligns more closely with real-world conditions, from 2D multi-exposure low dynamic range (LDR) images continues to pose significant ch… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06543v3-abstract-full').style.display = 'inline'; document.getElementById('2408.06543v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.06543v3-abstract-full" style="display: none;"> Recent years have witnessed substantial advancements in the field of 3D reconstruction from 2D images, particularly following the introduction of the neural radiance field (NeRF) technique. However, reconstructing a 3D high dynamic range (HDR) radiance field, which aligns more closely with real-world conditions, from 2D multi-exposure low dynamic range (LDR) images continues to pose significant challenges. Approaches to this issue fall into two categories: grid-based and implicit-based. Implicit methods, using multi-layer perceptrons (MLP), face inefficiencies, limited solvability, and overfitting risks. Conversely, grid-based methods require significant memory and struggle with image quality and long training times. In this paper, we introduce Gaussian Splatting-a recent, high-quality, real-time 3D reconstruction technique-into this domain. We further develop the High Dynamic Range Gaussian Splatting (HDR-GS) method, designed to address the aforementioned challenges. This method enhances color dimensionality by including luminance and uses an asymmetric grid for tone-mapping, swiftly and precisely converting pixel irradiance to color. Our approach improves HDR scene recovery accuracy and integrates a novel coarse-to-fine strategy to speed up model convergence, enhancing robustness against sparse viewpoints and exposure extremes, and preventing local optima. Extensive testing confirms that our method surpasses current state-of-the-art techniques in both synthetic and real-world scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06543v3-abstract-full').style.display = 'none'; document.getElementById('2408.06543v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.06037">arXiv:2408.06037</a> <span> [<a href="https://arxiv.org/pdf/2408.06037">pdf</a>, <a href="https://arxiv.org/format/2408.06037">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Hyperion: Unveiling DApp Inconsistencies using LLM and Dataflow-Guided Symbolic Execution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+S">Shuo Yang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+X">Xingwei Lin</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jiachi Chen</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+Q">Qingyuan Zhong</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lei Xiao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+R">Renke Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yanlin Wang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Z">Zibin Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.06037v1-abstract-short" style="display: inline;"> The rapid advancement of blockchain platforms has significantly accelerated the growth of decentralized applications (DApps). Similar to traditional applications, DApps integrate front-end descriptions that showcase their features to attract users, and back-end smart contracts for executing their business logic. However, inconsistencies between the features promoted in front-end descriptions and t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06037v1-abstract-full').style.display = 'inline'; document.getElementById('2408.06037v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.06037v1-abstract-full" style="display: none;"> The rapid advancement of blockchain platforms has significantly accelerated the growth of decentralized applications (DApps). Similar to traditional applications, DApps integrate front-end descriptions that showcase their features to attract users, and back-end smart contracts for executing their business logic. However, inconsistencies between the features promoted in front-end descriptions and those actually implemented in the contract can confuse users and undermine DApps's trustworthiness. In this paper, we first conducted an empirical study to identify seven types of inconsistencies, each exemplified by a real-world DApp. Furthermore, we introduce HYPERION, an approach designed to automatically identify inconsistencies between front-end descriptions and back-end code implementation in DApps. This method leverages a fine-tuned large language model LLaMA2 to analyze DApp descriptions and employs dataflow-guided symbolic execution for contract bytecode analysis. Finally, HYPERION reports the inconsistency based on predefined detection patterns. The experiment on our ground truth dataset consisting of 54 DApps shows that HYPERION reaches 84.06% overall recall and 92.06% overall precision in reporting DApp inconsistencies. We also implement HYPERION to analyze 835 real-world DApps. The experimental results show that HYPERION discovers 459 real-world DApps containing at least one inconsistency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06037v1-abstract-full').style.display = 'none'; document.getElementById('2408.06037v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICSE 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00767">arXiv:2408.00767</a> <span> [<a href="https://arxiv.org/pdf/2408.00767">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Quantification and Validation for Degree of Understanding in M2M Semantic Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xia%2C+L">Linhan Xia</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+J">Jiaxin Cai</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+R+Y">Ricky Yuen-Tan Hou</a>, <a href="/search/cs?searchtype=author&query=Jeong%2C+S">Seon-Phil Jeong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00767v1-abstract-short" style="display: inline;"> With the development of Artificial Intelligence (AI) and Internet of Things (IoT) technologies, network communications based on the Shannon-Nyquist theorem gradually reveal their limitations due to the neglect of semantic information in the transmitted content. Semantic communication (SemCom) provides a solution for extracting information meanings from the transmitted content. The semantic informa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00767v1-abstract-full').style.display = 'inline'; document.getElementById('2408.00767v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00767v1-abstract-full" style="display: none;"> With the development of Artificial Intelligence (AI) and Internet of Things (IoT) technologies, network communications based on the Shannon-Nyquist theorem gradually reveal their limitations due to the neglect of semantic information in the transmitted content. Semantic communication (SemCom) provides a solution for extracting information meanings from the transmitted content. The semantic information can be successfully interpreted by a receiver with the help of a shared knowledge base (KB). This paper proposes a two-stage hierarchical qualification and validation model for natural language-based machine-to-machine (M2M) SemCom. The approach can be applied in various applications, such as autonomous driving and edge computing. In the proposed model, we quantitatively measure the degree of understanding (DoU) between two communication parties at the word and sentence levels. The DoU is validated and ensured at each level before moving to the next step. The model's effectiveness is verified through a series of experiments, and the results show that the quantification and validation method proposed in this paper can significantly improve the DoU of inter-machine SemCom. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00767v1-abstract-full').style.display = 'none'; document.getElementById('2408.00767v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICCT 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.20530">arXiv:2407.20530</a> <span> [<a href="https://arxiv.org/pdf/2407.20530">pdf</a>, <a href="https://arxiv.org/format/2407.20530">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> SuperCodec: A Neural Speech Codec with Selective Back-Projection Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Youqiang Zheng</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+W">Weiping Tu</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Li Xiao</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xinmeng Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.20530v1-abstract-short" style="display: inline;"> Neural speech coding is a rapidly developing topic, where state-of-the-art approaches now exhibit superior compression performance than conventional methods. Despite significant progress, existing methods still have limitations in preserving and reconstructing fine details for optimal reconstruction, especially at low bitrates. In this study, we introduce SuperCodec, a neural speech codec that ach… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20530v1-abstract-full').style.display = 'inline'; document.getElementById('2407.20530v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.20530v1-abstract-full" style="display: none;"> Neural speech coding is a rapidly developing topic, where state-of-the-art approaches now exhibit superior compression performance than conventional methods. Despite significant progress, existing methods still have limitations in preserving and reconstructing fine details for optimal reconstruction, especially at low bitrates. In this study, we introduce SuperCodec, a neural speech codec that achieves state-of-the-art performance at low bitrates. It employs a novel back projection method with selective feature fusion for augmented representation. Specifically, we propose to use Selective Up-sampling Back Projection (SUBP) and Selective Down-sampling Back Projection (SDBP) modules to replace the standard up- and down-sampling layers at the encoder and decoder, respectively. Experimental results show that our method outperforms the existing neural speech codecs operating at various bitrates. Specifically, our proposed method can achieve higher quality reconstructed speech at 1 kbps than Lyra V2 at 3.2 kbps and Encodec at 6 kbps. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20530v1-abstract-full').style.display = 'none'; document.getElementById('2407.20530v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICASSP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.19674">arXiv:2407.19674</a> <span> [<a href="https://arxiv.org/pdf/2407.19674">pdf</a>, <a href="https://arxiv.org/format/2407.19674">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3664647.3680953">10.1145/3664647.3680953 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Advancing Prompt Learning through an External Layer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cui%2C+F">Fangming Cui</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xun Yang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+C">Chao Wu</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Liang Xiao</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+X">Xinmei Tian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.19674v6-abstract-short" style="display: inline;"> Prompt learning represents a promising method for adapting pre-trained vision-language models (VLMs) to various downstream tasks by learning a set of text embeddings. One challenge inherent to these methods is the poor generalization performance due to the invalidity of the learned text embeddings for unseen tasks. A straightforward approach to bridge this gap is to freeze the text embeddings in p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19674v6-abstract-full').style.display = 'inline'; document.getElementById('2407.19674v6-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.19674v6-abstract-full" style="display: none;"> Prompt learning represents a promising method for adapting pre-trained vision-language models (VLMs) to various downstream tasks by learning a set of text embeddings. One challenge inherent to these methods is the poor generalization performance due to the invalidity of the learned text embeddings for unseen tasks. A straightforward approach to bridge this gap is to freeze the text embeddings in prompts, which results in a lack of capacity to adapt VLMs for downstream tasks. To address this dilemma, we propose a paradigm called EnPrompt with a novel External Layer (EnLa). Specifically, we propose a textual external layer and learnable visual embeddings for adapting VLMs to downstream tasks. The learnable external layer is built upon valid embeddings of pre-trained CLIP. This design considers the balance of learning capabilities between the two branches. To align the textual and visual features, we propose a novel two-pronged approach: i) we introduce the optimal transport as the discrepancy metric to align the vision and text modalities, and ii) we introduce a novel strengthening feature to enhance the interaction between these two modalities. Four representative experiments (i.e., base-to-novel generalization, few-shot learning, cross-dataset generalization, domain shifts generalization) across 15 datasets demonstrate that our method outperforms the existing prompt learning method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19674v6-abstract-full').style.display = 'none'; document.getElementById('2407.19674v6-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.15173">arXiv:2407.15173</a> <span> [<a href="https://arxiv.org/pdf/2407.15173">pdf</a>, <a href="https://arxiv.org/format/2407.15173">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Rethinking Domain Adaptation and Generalization in the Era of CLIP </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+R">Ruoyu Feng</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+T">Tao Yu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+X">Xin Jin</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xiaoyuan Yu</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lei Xiao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhibo Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.15173v1-abstract-short" style="display: inline;"> In recent studies on domain adaptation, significant emphasis has been placed on the advancement of learning shared knowledge from a source domain to a target domain. Recently, the large vision-language pre-trained model, i.e., CLIP has shown strong ability on zero-shot recognition, and parameter efficient tuning can further improve its performance on specific tasks. This work demonstrates that a s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15173v1-abstract-full').style.display = 'inline'; document.getElementById('2407.15173v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.15173v1-abstract-full" style="display: none;"> In recent studies on domain adaptation, significant emphasis has been placed on the advancement of learning shared knowledge from a source domain to a target domain. Recently, the large vision-language pre-trained model, i.e., CLIP has shown strong ability on zero-shot recognition, and parameter efficient tuning can further improve its performance on specific tasks. This work demonstrates that a simple domain prior boosts CLIP's zero-shot recognition in a specific domain. Besides, CLIP's adaptation relies less on source domain data due to its diverse pre-training dataset. Furthermore, we create a benchmark for zero-shot adaptation and pseudo-labeling based self-training with CLIP. Last but not least, we propose to improve the task generalization ability of CLIP from multiple unlabeled domains, which is a more practical and unique scenario. We believe our findings motivate a rethinking of domain adaptation benchmarks and the associated role of related algorithms in the era of CLIP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15173v1-abstract-full').style.display = 'none'; document.getElementById('2407.15173v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.14742">arXiv:2407.14742</a> <span> [<a href="https://arxiv.org/pdf/2407.14742">pdf</a>, <a href="https://arxiv.org/format/2407.14742">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Dynamic Color Assignment for Hierarchical Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jiashu Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Weikai Yang</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+Z">Zelin Jia</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lanxi Xiao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shixia Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.14742v6-abstract-short" style="display: inline;"> Assigning discriminable and harmonic colors to samples according to their class labels and spatial distribution can generate attractive visualizations and facilitate data exploration. However, as the number of classes increases, it is challenging to generate a high-quality color assignment result that accommodates all classes simultaneously. A practical solution is to organize classes into a hiera… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14742v6-abstract-full').style.display = 'inline'; document.getElementById('2407.14742v6-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.14742v6-abstract-full" style="display: none;"> Assigning discriminable and harmonic colors to samples according to their class labels and spatial distribution can generate attractive visualizations and facilitate data exploration. However, as the number of classes increases, it is challenging to generate a high-quality color assignment result that accommodates all classes simultaneously. A practical solution is to organize classes into a hierarchy and then dynamically assign colors during exploration. However, existing color assignment methods fall short in generating high-quality color assignment results and dynamically aligning them with hierarchical structures. To address this issue, we develop a dynamic color assignment method for hierarchical data, which is formulated as a multi-objective optimization problem. This method simultaneously considers color discriminability, color harmony, and spatial distribution at each hierarchical level. By using the colors of parent classes to guide the color assignment of their child classes, our method further promotes both consistency and clarity across hierarchical levels. We demonstrate the effectiveness of our method in generating dynamic color assignment results with quantitative experiments and a user study. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14742v6-abstract-full').style.display = 'none'; document.getElementById('2407.14742v6-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE VIS 2024. This version fixes the email address and co-author information</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.12565">arXiv:2407.12565</a> <span> [<a href="https://arxiv.org/pdf/2407.12565">pdf</a>, <a href="https://arxiv.org/format/2407.12565">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> SigDLA: A Deep Learning Accelerator Extension for Signal Processing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fu%2C+F">Fangfa Fu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zesong Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Zhiyu Zhu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Guoyu Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+B">Bing Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Cheng Liu</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Liyi Xiao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jinxiang Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Huawei Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaowei Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.12565v1-abstract-short" style="display: inline;"> Deep learning and signal processing are closely correlated in many IoT scenarios such as anomaly detection to empower intelligence of things. Many IoT processors utilize digital signal processors (DSPs) for signal processing and build deep learning frameworks on this basis. While deep learning is usually much more computing-intensive than signal processing, the computing efficiency of deep learnin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12565v1-abstract-full').style.display = 'inline'; document.getElementById('2407.12565v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.12565v1-abstract-full" style="display: none;"> Deep learning and signal processing are closely correlated in many IoT scenarios such as anomaly detection to empower intelligence of things. Many IoT processors utilize digital signal processors (DSPs) for signal processing and build deep learning frameworks on this basis. While deep learning is usually much more computing-intensive than signal processing, the computing efficiency of deep learning on DSPs is limited due to the lack of native hardware support. In this case, we present a contrary strategy and propose to enable signal processing on top of a classical deep learning accelerator (DLA). With the observation that irregular data patterns such as butterfly operations in FFT are the major barrier that hinders the deployment of signal processing on DLAs, we propose a programmable data shuffling fabric and have it inserted between the input buffer and computing array of DLAs such that the irregular data is reorganized and the processing is converted to be regular. With the online data shuffling, the proposed architecture, SigDLA, can adapt to various signal processing tasks without affecting the deep learning processing. Moreover, we build a reconfigurable computing array to suit the various data width requirements of both signal processing and deep learning. According to our experiments, SigDLA achieves an average performance speedup of 4.4$\times$, 1.4$\times$, and 1.52$\times$, and average energy reduction of 4.82$\times$, 3.27$\times$, and 2.15$\times$ compared to an embedded ARM processor with customized DSP instructions, a DSP processor, and an independent DSP-DLA architecture respectively with 17% more chip area over the original DLAs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12565v1-abstract-full').style.display = 'none'; document.getElementById('2407.12565v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.09157">arXiv:2407.09157</a> <span> [<a href="https://arxiv.org/pdf/2407.09157">pdf</a>, <a href="https://arxiv.org/format/2407.09157">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Movie Recommendation with Poster Attention via Multi-modal Transformer Feature Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xia%2C+L">Linhan Xia</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yicheng Yang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Ziou Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zheng Yang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+S">Shengxin Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.09157v1-abstract-short" style="display: inline;"> Pre-trained models learn general representations from large datsets which can be fine-turned for specific tasks to significantly reduce training time. Pre-trained models like generative pretrained transformers (GPT), bidirectional encoder representations from transformers (BERT), vision transfomers (ViT) have become a cornerstone of current research in machine learning. This study proposes a multi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09157v1-abstract-full').style.display = 'inline'; document.getElementById('2407.09157v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.09157v1-abstract-full" style="display: none;"> Pre-trained models learn general representations from large datsets which can be fine-turned for specific tasks to significantly reduce training time. Pre-trained models like generative pretrained transformers (GPT), bidirectional encoder representations from transformers (BERT), vision transfomers (ViT) have become a cornerstone of current research in machine learning. This study proposes a multi-modal movie recommendation system by extract features of the well designed posters for each movie and the narrative text description of the movie. This system uses the BERT model to extract the information of text modality, the ViT model applied to extract the information of poster/image modality, and the Transformer architecture for feature fusion of all modalities to predict users' preference. The integration of pre-trained foundational models with some smaller data sets in downstream applications capture multi-modal content features in a more comprehensive manner, thereby providing more accurate recommendations. The efficiency of the proof-of-concept model is verified by the standard benchmark problem the MovieLens 100K and 1M datasets. The prediction accuracy of user ratings is enhanced in comparison to the baseline algorithm, thereby demonstrating the potential of this cross-modal algorithm to be applied for movie or video recommendation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.09157v1-abstract-full').style.display = 'none'; document.getElementById('2407.09157v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.08138">arXiv:2407.08138</a> <span> [<a href="https://arxiv.org/pdf/2407.08138">pdf</a>, <a href="https://arxiv.org/format/2407.08138">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> How Do Developers Structure Unit Test Cases? An Empirical Study from the "AAA" Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+C">Chenhao Wei</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lu Xiao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+T">Tingting Yu</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+S">Sunny Wong</a>, <a href="/search/cs?searchtype=author&query=Clune%2C+A">Abigail Clune</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.08138v1-abstract-short" style="display: inline;"> The AAA pattern, i.e. arrange, act, and assert, provides a unified structure for unit test cases, which benefits comprehension and maintenance. However, there is little understanding regarding whether and how common real-life developers structure unit test cases following AAA in practice. In particular, are there recurring anti-patterns that deviate from the AAA structure and merit refactoring? An… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08138v1-abstract-full').style.display = 'inline'; document.getElementById('2407.08138v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.08138v1-abstract-full" style="display: none;"> The AAA pattern, i.e. arrange, act, and assert, provides a unified structure for unit test cases, which benefits comprehension and maintenance. However, there is little understanding regarding whether and how common real-life developers structure unit test cases following AAA in practice. In particular, are there recurring anti-patterns that deviate from the AAA structure and merit refactoring? And, if test cases follow the AAA structure, could they contain design flaws in the A blocks? If we propose refactoring to fix the design of test cases following the AAA, how do developers receive the proposals? Do they favor refactoring? If not, what are their considerations? This study presents an empirical study on 435 real-life unit test cases randomly selected from four open-source projects. Overall, the majority (71.5%) of test cases follow the AAA structure. And, we observed three recurring anti-patterns that deviate from the AAA structure, as well as four design flaws that may reside inside of the A blocks. Each issue type has its drawbacks and merits corresponding refactoring resolutions. We sent a total of 18 refactoring proposals as issue tickets for fixing these problems. We received 78% positive feedback favoring the refactoring. From the rejections, we learned that return-on-investment is a key consideration for developers. The findings provide insights for practitioners to structure unit test cases with AAA in mind, and for researchers to develop related techniques for enforcing AAA in test cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08138v1-abstract-full').style.display = 'none'; document.getElementById('2407.08138v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> D.2.5 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.07397">arXiv:2407.07397</a> <span> [<a href="https://arxiv.org/pdf/2407.07397">pdf</a>, <a href="https://arxiv.org/format/2407.07397">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> SimuSOE: A Simulated Snoring Dataset for Obstructive Sleep Apnea-Hypopnea Syndrome Evaluation during Wakefulness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+J">Jie Lin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiuping Yang</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Li Xiao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xinhong Li</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+W">Weiyan Yi</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yuhong Yang</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+W">Weiping Tu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.07397v1-abstract-short" style="display: inline;"> Obstructive Sleep Apnea-Hypopnea Syndrome (OSAHS) is a prevalent chronic breathing disorder caused by upper airway obstruction. Previous studies advanced OSAHS evaluation through machine learning-based systems trained on sleep snoring or speech signal datasets. However, constructing datasets for training a precise and rapid OSAHS evaluation system poses a challenge, since 1) it is time-consuming t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07397v1-abstract-full').style.display = 'inline'; document.getElementById('2407.07397v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.07397v1-abstract-full" style="display: none;"> Obstructive Sleep Apnea-Hypopnea Syndrome (OSAHS) is a prevalent chronic breathing disorder caused by upper airway obstruction. Previous studies advanced OSAHS evaluation through machine learning-based systems trained on sleep snoring or speech signal datasets. However, constructing datasets for training a precise and rapid OSAHS evaluation system poses a challenge, since 1) it is time-consuming to collect sleep snores and 2) the speech signal is limited in reflecting upper airway obstruction. In this paper, we propose a new snoring dataset for OSAHS evaluation, named SimuSOE, in which a novel and time-effective snoring collection method is introduced for tackling the above problems. In particular, we adopt simulated snoring which is a type of snore intentionally emitted by patients to replace natural snoring. Experimental results indicate that the simulated snoring signal during wakefulness can serve as an effective feature in OSAHS preliminary screening. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07397v1-abstract-full').style.display = 'none'; document.getElementById('2407.07397v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.05872">arXiv:2407.05872</a> <span> [<a href="https://arxiv.org/pdf/2407.05872">pdf</a>, <a href="https://arxiv.org/format/2407.05872">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Scaling Exponents Across Parameterizations and Optimizers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Everett%2C+K">Katie Everett</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lechao Xiao</a>, <a href="/search/cs?searchtype=author&query=Wortsman%2C+M">Mitchell Wortsman</a>, <a href="/search/cs?searchtype=author&query=Alemi%2C+A+A">Alexander A. Alemi</a>, <a href="/search/cs?searchtype=author&query=Novak%2C+R">Roman Novak</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+P+J">Peter J. Liu</a>, <a href="/search/cs?searchtype=author&query=Gur%2C+I">Izzeddin Gur</a>, <a href="/search/cs?searchtype=author&query=Sohl-Dickstein%2C+J">Jascha Sohl-Dickstein</a>, <a href="/search/cs?searchtype=author&query=Kaelbling%2C+L+P">Leslie Pack Kaelbling</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Jaehoon Lee</a>, <a href="/search/cs?searchtype=author&query=Pennington%2C+J">Jeffrey Pennington</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.05872v2-abstract-short" style="display: inline;"> Robust and effective scaling of models from small to large width typically requires the precise adjustment of many algorithmic and architectural details, such as parameterization and optimizer choices. In this work, we propose a new perspective on parameterization by investigating a key assumption in prior work about the alignment between parameters and data and derive new theoretical results unde… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05872v2-abstract-full').style.display = 'inline'; document.getElementById('2407.05872v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.05872v2-abstract-full" style="display: none;"> Robust and effective scaling of models from small to large width typically requires the precise adjustment of many algorithmic and architectural details, such as parameterization and optimizer choices. In this work, we propose a new perspective on parameterization by investigating a key assumption in prior work about the alignment between parameters and data and derive new theoretical results under weaker assumptions and a broader set of optimizers. Our extensive empirical investigation includes tens of thousands of models trained with all combinations of three optimizers, four parameterizations, several alignment assumptions, more than a dozen learning rates, and fourteen model sizes up to 26.8B parameters. We find that the best learning rate scaling prescription would often have been excluded by the assumptions in prior work. Our results show that all parameterizations, not just maximal update parameterization (muP), can achieve hyperparameter transfer; moreover, our novel per-layer learning rate prescription for standard parameterization outperforms muP. Finally, we demonstrate that an overlooked aspect of parameterization, the epsilon parameter in Adam, must be scaled correctly to avoid gradient underflow and propose Adam-atan2, a new numerically stable, scale-invariant version of Adam that eliminates the epsilon hyperparameter entirely. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.05872v2-abstract-full').style.display = 'none'; document.getElementById('2407.05872v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">63 pages, International Conference on Machine Learning 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.04358">arXiv:2407.04358</a> <span> [<a href="https://arxiv.org/pdf/2407.04358">pdf</a>, <a href="https://arxiv.org/format/2407.04358">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> An Adaptive Stochastic Gradient Method with Non-negative Gauss-Newton Stepsizes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Orvieto%2C+A">Antonio Orvieto</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lin Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.04358v1-abstract-short" style="display: inline;"> We consider the problem of minimizing the average of a large number of smooth but possibly non-convex functions. In the context of most machine learning applications, each loss function is non-negative and thus can be expressed as the composition of a square and its real-valued square root. This reformulation allows us to apply the Gauss-Newton method, or the Levenberg-Marquardt method when adding… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04358v1-abstract-full').style.display = 'inline'; document.getElementById('2407.04358v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.04358v1-abstract-full" style="display: none;"> We consider the problem of minimizing the average of a large number of smooth but possibly non-convex functions. In the context of most machine learning applications, each loss function is non-negative and thus can be expressed as the composition of a square and its real-valued square root. This reformulation allows us to apply the Gauss-Newton method, or the Levenberg-Marquardt method when adding a quadratic regularization. The resulting algorithm, while being computationally as efficient as the vanilla stochastic gradient method, is highly adaptive and can automatically warmup and decay the effective stepsize while tracking the non-negative loss landscape. We provide a tight convergence analysis, leveraging new techniques, in the stochastic convex and non-convex settings. In particular, in the convex case, the method does not require access to the gradient Lipshitz constant for convergence, and is guaranteed to never diverge. The convergence rates and empirical evaluations compare favorably to the classical (stochastic) gradient method as well as to several other adaptive methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04358v1-abstract-full').style.display = 'none'; document.getElementById('2407.04358v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Xia%2C+L&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Xia%2C+L&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Xia%2C+L&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Xia%2C+L&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Xia%2C+L&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Xia%2C+L&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>