CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 98 results for author: <span class="mathjax">Gu, B</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Gu%2C+B">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Gu, B"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Gu%2C+B&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Gu, B"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Gu%2C+B&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Gu%2C+B&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Gu%2C+B&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10898">arXiv:2411.10898</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.10898">pdf</a>, <a href="https://arxiv.org/format/2411.10898">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Watermarking Generative Categorical Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bochao Gu</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+H">Hengzhi He</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+G">Guang Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10898v1-abstract-short" style="display: inline;"> In this paper, we propose a novel statistical framework for watermarking generative categorical data. Our method systematically embeds pre-agreed secret signals by splitting the data distribution into two components and modifying one distribution based on a deterministic relationship with the other, ensuring the watermark is embedded at the distribution-level. To verify the watermark, we introduce&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10898v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10898v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10898v1-abstract-full" style="display: none;"> In this paper, we propose a novel statistical framework for watermarking generative categorical data. Our method systematically embeds pre-agreed secret signals by splitting the data distribution into two components and modifying one distribution based on a deterministic relationship with the other, ensuring the watermark is embedded at the distribution-level. To verify the watermark, we introduce an insertion inverse algorithm and detect its presence by measuring the total variation distance between the inverse-decoded data and the original distribution. Unlike previous categorical watermarking methods, which primarily focus on embedding watermarks into a given dataset, our approach operates at the distribution-level, allowing for verification from a statistical distributional perspective. This makes it particularly well-suited for the modern paradigm of synthetic data generation, where the underlying data distribution, rather than specific data points, is of primary importance. The effectiveness of our method is demonstrated through both theoretical analysis and empirical validation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10898v1-abstract-full').style.display = 'none'; document.getElementById('2411.10898v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17040">arXiv:2410.17040</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.17040">pdf</a>, <a href="https://arxiv.org/format/2410.17040">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Arabic Dataset for LLM Safeguard Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ashraf%2C+Y">Yasser Ashraf</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yuxia Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Nakov%2C+P">Preslav Nakov</a>, <a href="/search/cs?searchtype=author&amp;query=Baldwin%2C+T">Timothy Baldwin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17040v1-abstract-short" style="display: inline;"> The growing use of large language models (LLMs) has raised concerns regarding their safety. While many studies have focused on English, the safety of LLMs in Arabic, with its linguistic and cultural complexities, remains under-explored. Here, we aim to bridge this gap. In particular, we present an Arab-region-specific safety evaluation dataset consisting of 5,799 questions, including direct attack&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17040v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17040v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17040v1-abstract-full" style="display: none;"> The growing use of large language models (LLMs) has raised concerns regarding their safety. While many studies have focused on English, the safety of LLMs in Arabic, with its linguistic and cultural complexities, remains under-explored. Here, we aim to bridge this gap. In particular, we present an Arab-region-specific safety evaluation dataset consisting of 5,799 questions, including direct attacks, indirect attacks, and harmless requests with sensitive words, adapted to reflect the socio-cultural context of the Arab world. To uncover the impact of different stances in handling sensitive and controversial topics, we propose a dual-perspective evaluation framework. It assesses the LLM responses from both governmental and opposition viewpoints. Experiments over five leading Arabic-centric and multilingual LLMs reveal substantial disparities in their safety performance. This reinforces the need for culturally specific datasets to ensure the responsible deployment of LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17040v1-abstract-full').style.display = 'none'; document.getElementById('2410.17040v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 6 figures, 10 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02559">arXiv:2410.02559</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.02559">pdf</a>, <a href="https://arxiv.org/format/2410.02559">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1162/neco_a_01636">10.1162/neco_a_01636 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Obtaining Lower Query Complexities through Lightweight Zeroth-Order Proximal Gradient Algorithms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+X">Xiyuan Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+H">Hualin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Chang%2C+Y">Yi Chang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+H">Heng Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02559v1-abstract-short" style="display: inline;"> Zeroth-order (ZO) optimization is one key technique for machine learning problems where gradient calculation is expensive or impossible. Several variance reduced ZO proximal algorithms have been proposed to speed up ZO optimization for non-smooth problems, and all of them opted for the coordinated ZO estimator against the random ZO estimator when approximating the true gradient, since the former i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02559v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02559v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02559v1-abstract-full" style="display: none;"> Zeroth-order (ZO) optimization is one key technique for machine learning problems where gradient calculation is expensive or impossible. Several variance reduced ZO proximal algorithms have been proposed to speed up ZO optimization for non-smooth problems, and all of them opted for the coordinated ZO estimator against the random ZO estimator when approximating the true gradient, since the former is more accurate. While the random ZO estimator introduces bigger error and makes convergence analysis more challenging compared to coordinated ZO estimator, it requires only $\mathcal{O}(1)$ computation, which is significantly less than $\mathcal{O}(d)$ computation of the coordinated ZO estimator, with $d$ being dimension of the problem space. To take advantage of the computationally efficient nature of the random ZO estimator, we first propose a ZO objective decrease (ZOOD) property which can incorporate two different types of errors in the upper bound of convergence rate. Next, we propose two generic reduction frameworks for ZO optimization which can automatically derive the convergence results for convex and non-convex problems respectively, as long as the convergence rate for the inner solver satisfies the ZOOD property. With the application of two reduction frameworks on our proposed ZOR-ProxSVRG and ZOR-ProxSAGA, two variance reduced ZO proximal algorithms with fully random ZO estimators, we improve the state-of-the-art function query complexities from $\mathcal{O}\left(\min\{\frac{dn^{1/2}}{蔚^2}, \frac{d}{蔚^3}\}\right)$ to $\tilde{\mathcal{O}}\left(\frac{n+d}{蔚^2}\right)$ under $d &gt; n^{\frac{1}{2}}$ for non-convex problems, and from $\mathcal{O}\left(\frac{d}{蔚^2}\right)$ to $\tilde{\mathcal{O}}\left(n\log\frac{1}蔚+\frac{d}蔚\right)$ for convex problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02559v1-abstract-full').style.display = 'none'; document.getElementById('2410.02559v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Neural Computation 36 (5), 897-935</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Neural Computation, 2024, 36(5): 897-935 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00459">arXiv:2409.00459</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.00459">pdf</a>, <a href="https://arxiv.org/ps/2409.00459">ps</a>, <a href="https://arxiv.org/format/2409.00459">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Gradient-Free Method for Heavily Constrained Nonconvex Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shi%2C+W">Wanli Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+H">Hongchang Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00459v1-abstract-short" style="display: inline;"> Zeroth-order (ZO) method has been shown to be a powerful method for solving the optimization problem where explicit expression of the gradients is difficult or infeasible to obtain. Recently, due to the practical value of the constrained problems, a lot of ZO Frank-Wolfe or projected ZO methods have been proposed. However, in many applications, we may have a very large number of nonconvex white/bl&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00459v1-abstract-full').style.display = 'inline'; document.getElementById('2409.00459v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00459v1-abstract-full" style="display: none;"> Zeroth-order (ZO) method has been shown to be a powerful method for solving the optimization problem where explicit expression of the gradients is difficult or infeasible to obtain. Recently, due to the practical value of the constrained problems, a lot of ZO Frank-Wolfe or projected ZO methods have been proposed. However, in many applications, we may have a very large number of nonconvex white/black-box constraints, which makes the existing zeroth-order methods extremely inefficient (or even not working) since they need to inquire function value of all the constraints and project the solution to the complicated feasible set. In this paper, to solve the nonconvex problem with a large number of white/black-box constraints, we proposed a doubly stochastic zeroth-order gradient method (DSZOG) with momentum method and adaptive step size. Theoretically, we prove DSZOG can converge to the $蔚$-stationary point of the constrained problem. Experimental results in two applications demonstrate the superiority of our method in terms of training time and accuracy compared with other ZO methods for the constrained problem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00459v1-abstract-full').style.display = 'none'; document.getElementById('2409.00459v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 page, 12 figures, conference</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> International Conference on Machine Learning. PMLR, 2022: 19935-19955 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.11316">arXiv:2408.11316</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.11316">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Probabilistic Medical Predictions of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bowen Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Desai%2C+R+J">Rishi J. Desai</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+K+J">Kueiyu Joshua Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jie Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.11316v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have demonstrated significant potential in clinical applications through prompt engineering, which enables the generation of flexible and diverse clinical predictions. However, they pose challenges in producing prediction probabilities, which are essential for transparency and allowing clinicians to apply flexible probability thresholds in decision-making. While explic&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11316v1-abstract-full').style.display = 'inline'; document.getElementById('2408.11316v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.11316v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have demonstrated significant potential in clinical applications through prompt engineering, which enables the generation of flexible and diverse clinical predictions. However, they pose challenges in producing prediction probabilities, which are essential for transparency and allowing clinicians to apply flexible probability thresholds in decision-making. While explicit prompt instructions can lead LLMs to provide prediction probability numbers through text generation, LLMs&#39; limitations in numerical reasoning raise concerns about the reliability of these text-generated probabilities. To assess this reliability, we compared explicit probabilities derived from text generation to implicit probabilities calculated based on the likelihood of predicting the correct label token. Experimenting with six advanced open-source LLMs across five medical datasets, we found that the performance of explicit probabilities was consistently lower than implicit probabilities with respect to discrimination, precision, and recall. Moreover, these differences were enlarged on small LLMs and imbalanced datasets, emphasizing the need for cautious interpretation and applications, as well as further research into robust probability estimation methods for LLMs in clinical contexts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11316v1-abstract-full').style.display = 'none'; document.getElementById('2408.11316v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">58 pages, 3 figures, 3 tables, Submitted to Nature Communication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.13076">arXiv:2407.13076</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.13076">pdf</a>, <a href="https://arxiv.org/format/2407.13076">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Matching-Driven Deep Reinforcement Learning for Energy-Efficient Transmission Parameter Allocation in Multi-Gateway LoRa Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Z">Ziqi Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+S">Shimin Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+L">Lanhua Li</a>, <a href="/search/cs?searchtype=author&amp;query=Su%2C+Z">Zhou Su</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bo Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.13076v1-abstract-short" style="display: inline;"> Long-range (LoRa) communication technology, distinguished by its low power consumption and long communication range, is widely used in the Internet of Things. Nevertheless, the LoRa MAC layer adopts pure ALOHA for medium access control, which may suffer from severe packet collisions as the network scale expands, consequently reducing the system energy efficiency (EE). To address this issue, it is&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13076v1-abstract-full').style.display = 'inline'; document.getElementById('2407.13076v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.13076v1-abstract-full" style="display: none;"> Long-range (LoRa) communication technology, distinguished by its low power consumption and long communication range, is widely used in the Internet of Things. Nevertheless, the LoRa MAC layer adopts pure ALOHA for medium access control, which may suffer from severe packet collisions as the network scale expands, consequently reducing the system energy efficiency (EE). To address this issue, it is critical to carefully allocate transmission parameters such as the channel (CH), transmission power (TP) and spreading factor (SF) to each end device (ED). Owing to the low duty cycle and sporadic traffic of LoRa networks, evaluating the system EE under various parameter settings proves to be time-consuming. Consequently, we propose an analytical model aimed at calculating the system EE while fully considering the impact of multiple gateways, duty cycling, quasi-orthogonal SFs and capture effects. On this basis, we investigate a joint CH, SF and TP allocation problem, with the objective of optimizing the system EE for uplink transmissions. Due to the NP-hard complexity of the problem, the optimization problem is decomposed into two subproblems: CH assignment and SF/TP assignment. First, a matching-based algorithm is introduced to address the CH assignment subproblem. Then, an attention-based multiagent reinforcement learning technique is employed to address the SF/TP assignment subproblem for EDs allocated to the same CH, which reduces the number of learning agents to achieve fast convergence. The simulation outcomes indicate that the proposed approach converges quickly under various parameter settings and obtains significantly better system EE than baseline algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.13076v1-abstract-full').style.display = 'none'; document.getElementById('2407.13076v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.03804">arXiv:2407.03804</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.03804">pdf</a>, <a href="https://arxiv.org/format/2407.03804">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Multi-Time Scale Service Caching and Pricing in MEC Systems with Dynamic Program Popularity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yiming Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+X">Xingyuan Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bo Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+S">Shimin Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Su%2C+Z">Zhou Su</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.03804v1-abstract-short" style="display: inline;"> In mobile edge computing systems, base stations (BSs) equipped with edge servers can provide computing services to users to reduce their task execution time. However, there is always a conflict of interest between the BS and users. The BS prices the service programs based on user demand to maximize its own profit, while the users determine their offloading strategies based on the prices to minimiz&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03804v1-abstract-full').style.display = 'inline'; document.getElementById('2407.03804v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.03804v1-abstract-full" style="display: none;"> In mobile edge computing systems, base stations (BSs) equipped with edge servers can provide computing services to users to reduce their task execution time. However, there is always a conflict of interest between the BS and users. The BS prices the service programs based on user demand to maximize its own profit, while the users determine their offloading strategies based on the prices to minimize their costs. Moreover, service programs need to be pre-cached to meet immediate computing needs. Due to the limited caching capacity and variations in service program popularity, the BS must dynamically select which service programs to cache. Since service caching and pricing have different needs for adjustment time granularities, we propose a two-time scale framework to jointly optimize service caching, pricing and task offloading. For the large time scale, we propose a game-nested deep reinforcement learning algorithm to dynamically adjust service caching according to the estimated popularity information. For the small time scale, by modeling the interaction between the BS and users as a two-stage game, we prove the existence of the equilibrium under incomplete information and then derive the optimal pricing and offloading strategies. Extensive simulations based on a real-world dataset demonstrate the efficiency of the proposed approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03804v1-abstract-full').style.display = 'none'; document.getElementById('2407.03804v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.17386">arXiv:2406.17386</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.17386">pdf</a>, <a href="https://arxiv.org/format/2406.17386">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Double Momentum Method for Lower-Level Constrained Bilevel Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shi%2C+W">Wanli Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Chang%2C+Y">Yi Chang</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.17386v1-abstract-short" style="display: inline;"> Bilevel optimization (BO) has recently gained prominence in many machine learning applications due to its ability to capture the nested structure inherent in these problems. Recently, many hypergradient methods have been proposed as effective solutions for solving large-scale problems. However, current hypergradient methods for the lower-level constrained bilevel optimization (LCBO) problems need&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17386v1-abstract-full').style.display = 'inline'; document.getElementById('2406.17386v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.17386v1-abstract-full" style="display: none;"> Bilevel optimization (BO) has recently gained prominence in many machine learning applications due to its ability to capture the nested structure inherent in these problems. Recently, many hypergradient methods have been proposed as effective solutions for solving large-scale problems. However, current hypergradient methods for the lower-level constrained bilevel optimization (LCBO) problems need very restrictive assumptions, namely, where optimality conditions satisfy the differentiability and invertibility conditions and lack a solid analysis of the convergence rate. What&#39;s worse, existing methods require either double-loop updates, which are sometimes less efficient. To solve this problem, in this paper, we propose a new hypergradient of LCBO leveraging the theory of nonsmooth implicit function theorem instead of using the restrive assumptions. In addition, we propose a \textit{single-loop single-timescale} algorithm based on the double-momentum method and adaptive step size method and prove it can return a $(未, 蔚)$-stationary point with $\tilde{\mathcal{O}}(d_2^2蔚^{-4})$ iterations. Experiments on two applications demonstrate the effectiveness of our proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.17386v1-abstract-full').style.display = 'none'; document.getElementById('2406.17386v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27pages, 9 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the 41st International Conference on Machine Learning, PMLR 235:44838-44864, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.18725">arXiv:2405.18725</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.18725">pdf</a>, <a href="https://arxiv.org/format/2405.18725">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Can We Enhance the Quality of Mobile Crowdsensing Data Without Ground Truth? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiajie Li</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bo Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+S">Shimin Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Su%2C+Z">Zhou Su</a>, <a href="/search/cs?searchtype=author&amp;query=Guizani%2C+M">Mohsen Guizani</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.18725v1-abstract-short" style="display: inline;"> Mobile crowdsensing (MCS) has emerged as a prominent trend across various domains. However, ensuring the quality of the sensing data submitted by mobile users (MUs) remains a complex and challenging problem. To address this challenge, an advanced method is required to detect low-quality sensing data and identify malicious MUs that may disrupt the normal operations of an MCS system. Therefore, this&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18725v1-abstract-full').style.display = 'inline'; document.getElementById('2405.18725v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.18725v1-abstract-full" style="display: none;"> Mobile crowdsensing (MCS) has emerged as a prominent trend across various domains. However, ensuring the quality of the sensing data submitted by mobile users (MUs) remains a complex and challenging problem. To address this challenge, an advanced method is required to detect low-quality sensing data and identify malicious MUs that may disrupt the normal operations of an MCS system. Therefore, this article proposes a prediction- and reputation-based truth discovery (PRBTD) framework, which can separate low-quality data from high-quality data in sensing tasks. First, we apply a correlation-focused spatial-temporal transformer network to predict the ground truth of the input sensing data. Then, we extract the sensing errors of the data as features based on the prediction results to calculate the implications among the data. Finally, we design a reputation-based truth discovery (TD) module for identifying low-quality data with their implications. Given sensing data submitted by MUs, PRBTD can eliminate the data with heavy noise and identify malicious MUs with high accuracy. Extensive experimental results demonstrate that PRBTD outperforms the existing methods in terms of identification accuracy and data quality enhancement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18725v1-abstract-full').style.display = 'none'; document.getElementById('2405.18725v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.01615">arXiv:2405.01615</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.01615">pdf</a>, <a href="https://arxiv.org/format/2405.01615">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Hard-Thresholding Meets Evolution Strategies in Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gao%2C+C">Chengqian Gao</a>, <a href="/search/cs?searchtype=author&amp;query=de+Vazelhes%2C+W">William de Vazelhes</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+H">Hualin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+Z">Zhiqiang Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.01615v1-abstract-short" style="display: inline;"> Evolution Strategies (ES) have emerged as a competitive alternative for model-free reinforcement learning, showcasing exemplary performance in tasks like Mujoco and Atari. Notably, they shine in scenarios with imperfect reward functions, making them invaluable for real-world applications where dense reward signals may be elusive. Yet, an inherent assumption in ES, that all input features are task-&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01615v1-abstract-full').style.display = 'inline'; document.getElementById('2405.01615v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.01615v1-abstract-full" style="display: none;"> Evolution Strategies (ES) have emerged as a competitive alternative for model-free reinforcement learning, showcasing exemplary performance in tasks like Mujoco and Atari. Notably, they shine in scenarios with imperfect reward functions, making them invaluable for real-world applications where dense reward signals may be elusive. Yet, an inherent assumption in ES, that all input features are task-relevant, poses challenges, especially when confronted with irrelevant features common in real-world problems. This work scrutinizes this limitation, particularly focusing on the Natural Evolution Strategies (NES) variant. We propose NESHT, a novel approach that integrates Hard-Thresholding (HT) with NES to champion sparsity, ensuring only pertinent features are employed. Backed by rigorous analysis and empirical tests, NESHT demonstrates its promise in mitigating the pitfalls of irrelevant features and shines in complex decision-making problems like noisy Mujoco and Atari tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01615v1-abstract-full').style.display = 'none'; document.getElementById('2405.01615v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, including proofs in the appendix</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.19449">arXiv:2404.19449</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.19449">pdf</a>, <a href="https://arxiv.org/format/2404.19449">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> AoI-aware Sensing Scheduling and Trajectory Optimization for Multi-UAV-assisted Wireless Backscatter Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Long%2C+Y">Yusi Long</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+S">Songhan Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+S">Shimin Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bo Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&amp;query=Xuemin"> Xuemin</a>, <a href="/search/cs?searchtype=author&amp;query=Shen"> Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.19449v1-abstract-short" style="display: inline;"> This paper considers multiple unmanned aerial vehicles (UAVs) to assist sensing data transmissions from the ground users (GUs) to a remote base station (BS). Each UAV collects sensing data from the GUs and then forwards the sensing data to the remote BS. The GUs first backscatter their data to the UAVs and then all UAVs forward data to the BS by the nonorthogonal multiple access (NOMA) transmissio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.19449v1-abstract-full').style.display = 'inline'; document.getElementById('2404.19449v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.19449v1-abstract-full" style="display: none;"> This paper considers multiple unmanned aerial vehicles (UAVs) to assist sensing data transmissions from the ground users (GUs) to a remote base station (BS). Each UAV collects sensing data from the GUs and then forwards the sensing data to the remote BS. The GUs first backscatter their data to the UAVs and then all UAVs forward data to the BS by the nonorthogonal multiple access (NOMA) transmissions. We formulate a multi-stage stochastic optimization problem to minimize the long-term time-averaged age-of-information (AoI) by jointly optimizing the GUs&#39; access control, the UAVs&#39; beamforming, and trajectory planning strategies. To solve this problem, we first model the dynamics of the GUs&#39; AoI statuses by virtual queueing systems, and then propose the AoI-aware sensing scheduling and trajectory optimization (AoI-STO) algorithm. This allows us to transform the multi-stage AoI minimization problem into a series of per-slot control problems by using the Lyapunov optimization framework. In each time slot, the GUs&#39; access control, the UAVs&#39; beamforming, and mobility control strategies are updated by using the block coordinate descent (BCD) method according to the instant GUs&#39; AoI statuses. Simulation results reveal that the proposed AoI-STO algorithm can reduce the overall AoI by more than 50%. The GUs&#39; scheduling fairness is also improved greatly by adapting the GUs&#39; access control compared with typical baseline schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.19449v1-abstract-full').style.display = 'none'; document.getElementById('2404.19449v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by IEEE TVT</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.08885">arXiv:2404.08885</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.08885">pdf</a>, <a href="https://arxiv.org/format/2404.08885">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Is Next Token Prediction Sufficient for GPT? Exploration on Code Logic Comprehension </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qi%2C+M">Mengnan Qi</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yufan Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+Y">Yongqiang Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+M">Maoquan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Sundaresan%2C+N">Neel Sundaresan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.08885v1-abstract-short" style="display: inline;"> Large language models (LLMs) has experienced exponential growth, they demonstrate remarkable performance across various tasks. Notwithstanding, contemporary research primarily centers on enhancing the size and quality of pretraining data, still utilizing the next token prediction task on autoregressive transformer model structure. The efficacy of this task in truly facilitating the model&#39;s compreh&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.08885v1-abstract-full').style.display = 'inline'; document.getElementById('2404.08885v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.08885v1-abstract-full" style="display: none;"> Large language models (LLMs) has experienced exponential growth, they demonstrate remarkable performance across various tasks. Notwithstanding, contemporary research primarily centers on enhancing the size and quality of pretraining data, still utilizing the next token prediction task on autoregressive transformer model structure. The efficacy of this task in truly facilitating the model&#39;s comprehension of code logic remains questionable, we speculate that it still interprets code as mere text, while human emphasizes the underlying logical knowledge. In order to prove it, we introduce a new task, &#34;Logically Equivalent Code Selection,&#34; which necessitates the selection of logically equivalent code from a candidate set, given a query code. Our experimental findings indicate that current LLMs underperform in this task, since they understand code by unordered bag of keywords. To ameliorate their performance, we propose an advanced pretraining task, &#34;Next Token Prediction+&#34;. This task aims to modify the sentence embedding distribution of the LLM without sacrificing its generative capabilities. Our experimental results reveal that following this pretraining, both Code Llama and StarCoder, the prevalent code domain pretraining models, display significant improvements on our logically equivalent code selection task and the code completion task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.08885v1-abstract-full').style.display = 'none'; document.getElementById('2404.08885v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.01897">arXiv:2404.01897</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.01897">pdf</a>, <a href="https://arxiv.org/format/2404.01897">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Continuous Spiking Graph Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yin%2C+N">Nan Yin</a>, <a href="/search/cs?searchtype=author&amp;query=Wan%2C+M">Mengzhu Wan</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+L">Li Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Patel%2C+H+L">Hitesh Laxmichand Patel</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+B">Baopu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+H">Huan Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.01897v1-abstract-short" style="display: inline;"> Continuous graph neural networks (CGNNs) have garnered significant attention due to their ability to generalize existing discrete graph neural networks (GNNs) by introducing continuous dynamics. They typically draw inspiration from diffusion-based methods to introduce a novel propagation scheme, which is analyzed using ordinary differential equations (ODE). However, the implementation of CGNNs req&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01897v1-abstract-full').style.display = 'inline'; document.getElementById('2404.01897v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.01897v1-abstract-full" style="display: none;"> Continuous graph neural networks (CGNNs) have garnered significant attention due to their ability to generalize existing discrete graph neural networks (GNNs) by introducing continuous dynamics. They typically draw inspiration from diffusion-based methods to introduce a novel propagation scheme, which is analyzed using ordinary differential equations (ODE). However, the implementation of CGNNs requires significant computational power, making them challenging to deploy on battery-powered devices. Inspired by recent spiking neural networks (SNNs), which emulate a biological inference process and provide an energy-efficient neural architecture, we incorporate the SNNs with CGNNs in a unified framework, named Continuous Spiking Graph Neural Networks (COS-GNN). We employ SNNs for graph node representation at each time step, which are further integrated into the ODE process along with time. To enhance information preservation and mitigate information loss in SNNs, we introduce the high-order structure of COS-GNN, which utilizes the second-order ODE for spiking representation and continuous propagation. Moreover, we provide the theoretical proof that COS-GNN effectively mitigates the issues of exploding and vanishing gradients, enabling us to capture long-range dependencies between nodes. Experimental results on graph-based learning tasks demonstrate the effectiveness of the proposed COS-GNN over competitive baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01897v1-abstract-full').style.display = 'none'; document.getElementById('2404.01897v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.18388">arXiv:2403.18388</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.18388">pdf</a>, <a href="https://arxiv.org/format/2403.18388">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FTBC: Forward Temporal Bias Correction for Optimizing ANN-SNN Conversion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wu%2C+X">Xiaofeng Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Bojkovic%2C+V">Velibor Bojkovic</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Suo%2C+K">Kun Suo</a>, <a href="/search/cs?searchtype=author&amp;query=Zou%2C+K">Kai Zou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.18388v1-abstract-short" style="display: inline;"> Spiking Neural Networks (SNNs) offer a promising avenue for energy-efficient computing compared with Artificial Neural Networks (ANNs), closely mirroring biological neural processes. However, this potential comes with inherent challenges in directly training SNNs through spatio-temporal backpropagation -- stemming from the temporal dynamics of spiking neurons and their discrete signal processing -&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.18388v1-abstract-full').style.display = 'inline'; document.getElementById('2403.18388v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.18388v1-abstract-full" style="display: none;"> Spiking Neural Networks (SNNs) offer a promising avenue for energy-efficient computing compared with Artificial Neural Networks (ANNs), closely mirroring biological neural processes. However, this potential comes with inherent challenges in directly training SNNs through spatio-temporal backpropagation -- stemming from the temporal dynamics of spiking neurons and their discrete signal processing -- which necessitates alternative ways of training, most notably through ANN-SNN conversion. In this work, we introduce a lightweight Forward Temporal Bias Correction (FTBC) technique, aimed at enhancing conversion accuracy without the computational overhead. We ground our method on provided theoretical findings that through proper temporal bias calibration the expected error of ANN-SNN conversion can be reduced to be zero after each time step. We further propose a heuristic algorithm for finding the temporal bias only in the forward pass, thus eliminating the computational burden of backpropagation and we evaluate our method on CIFAR-10/100 and ImageNet datasets, achieving a notable increase in accuracy on all datasets. Codes are released at a GitHub repository. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.18388v1-abstract-full').style.display = 'none'; document.getElementById('2403.18388v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.15938">arXiv:2402.15938</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.15938">pdf</a>, <a href="https://arxiv.org/format/2402.15938">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Generalization or Memorization: Data Contamination and Trustworthy Evaluation for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dong%2C+Y">Yihong Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+X">Xue Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Huanyu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+Z">Zhi Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+M">Mengfei Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+G">Ge Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.15938v3-abstract-short" style="display: inline;"> Recent statements about the impressive capabilities of large language models (LLMs) are usually supported by evaluating on open-access benchmarks. Considering the vast size and wide-ranging sources of LLMs&#39; training data, it could explicitly or implicitly include test data, leading to LLMs being more susceptible to data contamination. However, due to the opacity of training data, the black-box acc&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15938v3-abstract-full').style.display = 'inline'; document.getElementById('2402.15938v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.15938v3-abstract-full" style="display: none;"> Recent statements about the impressive capabilities of large language models (LLMs) are usually supported by evaluating on open-access benchmarks. Considering the vast size and wide-ranging sources of LLMs&#39; training data, it could explicitly or implicitly include test data, leading to LLMs being more susceptible to data contamination. However, due to the opacity of training data, the black-box access of models, and the rapid growth of synthetic training data, detecting and mitigating data contamination for LLMs faces significant challenges. In this paper, we propose CDD, which stands for Contamination Detection via output Distribution for LLMs. CDD necessitates only the sampled texts to detect data contamination, by identifying the peakedness of LLM&#39;s output distribution. To mitigate the impact of data contamination in evaluation, we also present TED: Trustworthy Evaluation via output Distribution, based on the correction of LLM&#39;s output distribution. To facilitate this study, we introduce two benchmarks, i.e., DetCon and ComiEval, for data contamination detection and contamination mitigation evaluation tasks. Extensive experimental results show that CDD achieves the average relative improvements of 21.8\%-30.2\% over other contamination detection approaches in terms of Accuracy, F1 Score, and AUC metrics, and can effectively detect implicit contamination. TED substantially mitigates performance improvements up to 66.9\% attributed to data contamination across various contamination setups. In real-world applications, we reveal that ChatGPT exhibits a high potential to suffer from data contamination on HumanEval benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.15938v3-abstract-full').style.display = 'none'; document.getElementById('2402.15938v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACL</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.13241">arXiv:2402.13241</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.13241">pdf</a>, <a href="https://arxiv.org/format/2402.13241">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Federated Causal Discovery from Heterogeneous Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+L">Loka Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ng%2C+I">Ignavier Ng</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+G">Gongxu Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+B">Biwei Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+G">Guangyi Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+T">Tongliang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+K">Kun Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.13241v2-abstract-short" style="display: inline;"> Conventional causal discovery methods rely on centralized data, which is inconsistent with the decentralized nature of data in many real-world situations. This discrepancy has motivated the development of federated causal discovery (FCD) approaches. However, existing FCD methods may be limited by their potentially restrictive assumptions of identifiable functional causal models or homogeneous data&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13241v2-abstract-full').style.display = 'inline'; document.getElementById('2402.13241v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.13241v2-abstract-full" style="display: none;"> Conventional causal discovery methods rely on centralized data, which is inconsistent with the decentralized nature of data in many real-world situations. This discrepancy has motivated the development of federated causal discovery (FCD) approaches. However, existing FCD methods may be limited by their potentially restrictive assumptions of identifiable functional causal models or homogeneous data distributions, narrowing their applicability in diverse scenarios. In this paper, we propose a novel FCD method attempting to accommodate arbitrary causal models and heterogeneous data. We first utilize a surrogate variable corresponding to the client index to account for the data heterogeneity across different clients. We then develop a federated conditional independence test (FCIT) for causal skeleton discovery and establish a federated independent change principle (FICP) to determine causal directions. These approaches involve constructing summary statistics as a proxy of the raw data to protect data privacy. Owing to the nonparametric properties, FCIT and FICP make no assumption about particular functional forms, thereby facilitating the handling of arbitrary causal models. We conduct extensive experiments on synthetic and real datasets to show the efficacy of our method. The code is available at https://github.com/lokali/FedCDH.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13241v2-abstract-full').style.display = 'none'; document.getElementById('2402.13241v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.01146">arXiv:2402.01146</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.01146">pdf</a>, <a href="https://arxiv.org/format/2402.01146">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Limited Memory Online Gradient Descent for Kernelized Pairwise Learning with Dynamic Averaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=AlQuabeh%2C+H">Hilal AlQuabeh</a>, <a href="/search/cs?searchtype=author&amp;query=de+Vazelhes%2C+W">William de Vazelhes</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.01146v1-abstract-short" style="display: inline;"> Pairwise learning, an important domain within machine learning, addresses loss functions defined on pairs of training examples, including those in metric learning and AUC maximization. Acknowledging the quadratic growth in computation complexity accompanying pairwise loss as the sample size grows, researchers have turned to online gradient descent (OGD) methods for enhanced scalability. Recently,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01146v1-abstract-full').style.display = 'inline'; document.getElementById('2402.01146v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.01146v1-abstract-full" style="display: none;"> Pairwise learning, an important domain within machine learning, addresses loss functions defined on pairs of training examples, including those in metric learning and AUC maximization. Acknowledging the quadratic growth in computation complexity accompanying pairwise loss as the sample size grows, researchers have turned to online gradient descent (OGD) methods for enhanced scalability. Recently, an OGD algorithm emerged, employing gradient computation involving prior and most recent examples, a step that effectively reduces algorithmic complexity to $O(T)$, with $T$ being the number of received examples. This approach, however, confines itself to linear models while assuming the independence of example arrivals. We introduce a lightweight OGD algorithm that does not require the independence of examples and generalizes to kernel pairwise learning. Our algorithm builds the gradient based on a random example and a moving average representing the past data, which results in a sub-linear regret bound with a complexity of $O(T)$. Furthermore, through the integration of $O(\sqrt{T}{\log{T}})$ random Fourier features, the complexity of kernel calculations is effectively minimized. Several experiments with real-world datasets show that the proposed technique outperforms kernel and linear algorithms in offline and online scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01146v1-abstract-full').style.display = 'none'; document.getElementById('2402.01146v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in AAAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.12983">arXiv:2401.12983</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.12983">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Physics Education">physics.ed-ph</span> </div> </div> <p class="title is-5 mathjax"> Assessing Large Language Models in Mechanical Engineering Education: A Study on Mechanics-Focused Conceptual Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tian%2C+J">Jie Tian</a>, <a href="/search/cs?searchtype=author&amp;query=Hou%2C+J">Jixin Hou</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Z">Zihao Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Shu%2C+P">Peng Shu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zhengliang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Xiang%2C+Y">Yujie Xiang</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Beikang Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Filla%2C+N">Nicholas Filla</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yiwei Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+N">Ning Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xianyan Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+K">Keke Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+T">Tianming Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xianqiao Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.12983v1-abstract-short" style="display: inline;"> This study is a pioneering endeavor to investigate the capabilities of Large Language Models (LLMs) in addressing conceptual questions within the domain of mechanical engineering with a focus on mechanics. Our examination involves a manually crafted exam encompassing 126 multiple-choice questions, spanning various aspects of mechanics courses, including Fluid Mechanics, Mechanical Vibration, Engin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.12983v1-abstract-full').style.display = 'inline'; document.getElementById('2401.12983v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.12983v1-abstract-full" style="display: none;"> This study is a pioneering endeavor to investigate the capabilities of Large Language Models (LLMs) in addressing conceptual questions within the domain of mechanical engineering with a focus on mechanics. Our examination involves a manually crafted exam encompassing 126 multiple-choice questions, spanning various aspects of mechanics courses, including Fluid Mechanics, Mechanical Vibration, Engineering Statics and Dynamics, Mechanics of Materials, Theory of Elasticity, and Continuum Mechanics. Three LLMs, including ChatGPT (GPT-3.5), ChatGPT (GPT-4), and Claude (Claude-2.1), were subjected to evaluation against engineering faculties and students with or without mechanical engineering background. The findings reveal GPT-4&#39;s superior performance over the other two LLMs and human cohorts in answering questions across various mechanics topics, except for Continuum Mechanics. This signals the potential future improvements for GPT models in handling symbolic calculations and tensor analyses. The performances of LLMs were all significantly improved with explanations prompted prior to direct responses, underscoring the crucial role of prompt engineering. Interestingly, GPT-3.5 demonstrates improved performance with prompts covering a broader domain, while GPT-4 excels with prompts focusing on specific subjects. Finally, GPT-4 exhibits notable advancements in mitigating input bias, as evidenced by guessing preferences for humans. This study unveils the substantial potential of LLMs as highly knowledgeable assistants in both mechanical pedagogy and scientific research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.12983v1-abstract-full').style.display = 'none'; document.getElementById('2401.12983v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 7 figures, and 1 table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.06401">arXiv:2401.06401</a> <span>&nbsp;&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> DevEval: Evaluating Code Generation in Practical Software Projects </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jia Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+G">Ge Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yunfei Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yongmin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+Z">Zhi Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+H">Hao Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Huanyu Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+K">Kaibo Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Lecheng Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Fang%2C+Z">Zheng Fang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Lanshen Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+J">Jiazheng Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xuanming Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Dong%2C+Y">Yihong Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Y">Yuqi Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+M">Mengfei Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.06401v4-abstract-short" style="display: inline;"> How to evaluate Large Language Models (LLMs) in code generation is an open question. Many benchmarks have been proposed but are inconsistent with practical software projects, e.g., unreal program distributions, insufficient dependencies, and small-scale project contexts. Thus, the capabilities of LLMs in practical projects are still unclear. In this paper, we propose a new benchmark named DevEval,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.06401v4-abstract-full').style.display = 'inline'; document.getElementById('2401.06401v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.06401v4-abstract-full" style="display: none;"> How to evaluate Large Language Models (LLMs) in code generation is an open question. Many benchmarks have been proposed but are inconsistent with practical software projects, e.g., unreal program distributions, insufficient dependencies, and small-scale project contexts. Thus, the capabilities of LLMs in practical projects are still unclear. In this paper, we propose a new benchmark named DevEval, aligned with Developers&#39; experiences in practical projects. DevEval is collected through a rigorous pipeline, containing 2,690 samples from 119 practical projects and covering 10 domains. Compared to previous benchmarks, DevEval aligns to practical projects in multiple dimensions, e.g., real program distributions, sufficient dependencies, and enough-scale project contexts. We assess five popular LLMs on DevEval (e.g., gpt-4, gpt-3.5-turbo, CodeLLaMa, and StarCoder) and reveal their actual abilities in code generation. For instance, the highest Pass@1 of gpt-3.5-turbo only is 42 in our experiments. We also discuss the challenges and future directions of code generation in practical projects. We open-source DevEval and hope it can facilitate the development of code generation in practical projects. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.06401v4-abstract-full').style.display = 'none'; document.getElementById('2401.06401v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">We are re-checking this benchmark and repeating related experiments. New versions of DevEval will be released later</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.05394">arXiv:2401.05394</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.05394">pdf</a>, <a href="https://arxiv.org/format/2401.05394">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Iterative Regularization with k-support Norm: An Important Complement to Sparse Recovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=de+Vazelhes%2C+W">William de Vazelhes</a>, <a href="/search/cs?searchtype=author&amp;query=Mukhoty%2C+B">Bhaskar Mukhoty</a>, <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+X">Xiao-Tong Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.05394v4-abstract-short" style="display: inline;"> Sparse recovery is ubiquitous in machine learning and signal processing. Due to the NP-hard nature of sparse recovery, existing methods are known to suffer either from restrictive (or even unknown) applicability conditions, or high computational cost. Recently, iterative regularization methods have emerged as a promising fast approach because they can achieve sparse recovery in one pass through ea&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05394v4-abstract-full').style.display = 'inline'; document.getElementById('2401.05394v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.05394v4-abstract-full" style="display: none;"> Sparse recovery is ubiquitous in machine learning and signal processing. Due to the NP-hard nature of sparse recovery, existing methods are known to suffer either from restrictive (or even unknown) applicability conditions, or high computational cost. Recently, iterative regularization methods have emerged as a promising fast approach because they can achieve sparse recovery in one pass through early stopping, rather than the tedious grid-search used in the traditional methods. However, most of those iterative methods are based on the $\ell_1$ norm which requires restrictive applicability conditions and could fail in many cases. Therefore, achieving sparse recovery with iterative regularization methods under a wider range of conditions has yet to be further explored. To address this issue, we propose a novel iterative regularization algorithm, IRKSN, based on the $k$-support norm regularizer rather than the $\ell_1$ norm. We provide conditions for sparse recovery with IRKSN, and compare them with traditional conditions for recovery with $\ell_1$ norm regularizers. Additionally, we give an early stopping bound on the model error of IRKSN with explicit constants, achieving the standard linear rate for sparse recovery. Finally, we illustrate the applicability of our algorithm on several experiments, including a support recovery experiment with a correlated design matrix. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05394v4-abstract-full').style.display = 'none'; document.getElementById('2401.05394v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at AAAI 2024. Code at https://github.com/wdevazelhes/IRKSN_AAAI2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.05373">arXiv:2401.05373</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.05373">pdf</a>, <a href="https://arxiv.org/format/2401.05373">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Dynamic Spiking Framework for Graph Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yin%2C+N">Nan Yin</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+M">Mengzhu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zhenghan Chen</a>, <a href="/search/cs?searchtype=author&amp;query=De+Masi%2C+G">Giulia De Masi</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+H">Huan Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.05373v3-abstract-short" style="display: inline;"> The integration of Spiking Neural Networks (SNNs) and Graph Neural Networks (GNNs) is gradually attracting attention due to the low power consumption and high efficiency in processing the non-Euclidean data represented by graphs. However, as a common problem, dynamic graph representation learning faces challenges such as high complexity and large memory overheads. Current work often uses SNNs inst&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05373v3-abstract-full').style.display = 'inline'; document.getElementById('2401.05373v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.05373v3-abstract-full" style="display: none;"> The integration of Spiking Neural Networks (SNNs) and Graph Neural Networks (GNNs) is gradually attracting attention due to the low power consumption and high efficiency in processing the non-Euclidean data represented by graphs. However, as a common problem, dynamic graph representation learning faces challenges such as high complexity and large memory overheads. Current work often uses SNNs instead of Recurrent Neural Networks (RNNs) by using binary features instead of continuous ones for efficient training, which would overlooks graph structure information and leads to the loss of details during propagation. Additionally, optimizing dynamic spiking models typically requires propagation of information across time steps, which increases memory requirements. To address these challenges, we present a framework named \underline{Dy}namic \underline{S}p\underline{i}king \underline{G}raph \underline{N}eural Networks (\method{}). To mitigate the information loss problem, \method{} propagates early-layer information directly to the last layer for information compensation. To accommodate the memory requirements, we apply the implicit differentiation on the equilibrium state, which does not rely on the exact reverse of the forward computation. While traditional implicit differentiation methods are usually used for static situations, \method{} extends it to the dynamic graph setting. Extensive experiments on three large-scale real-world dynamic graph datasets validate the effectiveness of \method{} on dynamic node classification tasks with lower computational costs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.05373v3-abstract-full').style.display = 'none'; document.getElementById('2401.05373v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.11508">arXiv:2312.11508</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.11508">pdf</a>, <a href="https://arxiv.org/format/2312.11508">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Rethinking the Instruction Quality: LIFT is What You Need </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xu%2C+Y">Yang Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+Y">Yongqiang Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yufan Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Qi%2C+M">Mengnan Qi</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+M">Maoquan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Sundaresan%2C+N">Neel Sundaresan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.11508v2-abstract-short" style="display: inline;"> Instruction tuning, a specialized technique to enhance large language model (LLM) performance via instruction datasets, relies heavily on the quality of employed data. Existing quality improvement methods alter instruction data through dataset expansion or curation. However, the expansion method risks data redundancy, potentially compromising LLM performance, while the curation approach confines t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11508v2-abstract-full').style.display = 'inline'; document.getElementById('2312.11508v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.11508v2-abstract-full" style="display: none;"> Instruction tuning, a specialized technique to enhance large language model (LLM) performance via instruction datasets, relies heavily on the quality of employed data. Existing quality improvement methods alter instruction data through dataset expansion or curation. However, the expansion method risks data redundancy, potentially compromising LLM performance, while the curation approach confines the LLM&#39;s potential to the original dataset. Our aim is to surpass the original data quality without encountering these shortcomings. To achieve this, we propose LIFT (LLM Instruction Fusion Transfer), a novel and versatile paradigm designed to elevate the instruction quality to new heights. LIFT strategically broadens data distribution to encompass more high-quality subspaces and eliminates redundancy, concentrating on high-quality segments across overall data subspaces. Experimental results demonstrate that, even with a limited quantity of high-quality instruction data selected by our paradigm, LLMs not only consistently uphold robust performance across various tasks but also surpass some state-of-the-art results, highlighting the significant improvement in instruction quality achieved by our paradigm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.11508v2-abstract-full').style.display = 'none'; document.getElementById('2312.11508v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.15368">arXiv:2311.15368</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.15368">pdf</a>, <a href="https://arxiv.org/format/2311.15368">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Flow-Guided Diffusion for Video Inpainting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bohai Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+Y">Yongsheng Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+H">Heng Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Libo Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.15368v1-abstract-short" style="display: inline;"> Video inpainting has been challenged by complex scenarios like large movements and low-light conditions. Current methods, including emerging diffusion models, face limitations in quality and efficiency. This paper introduces the Flow-Guided Diffusion model for Video Inpainting (FGDVI), a novel approach that significantly enhances temporal consistency and inpainting quality via reusing an off-the-s&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.15368v1-abstract-full').style.display = 'inline'; document.getElementById('2311.15368v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.15368v1-abstract-full" style="display: none;"> Video inpainting has been challenged by complex scenarios like large movements and low-light conditions. Current methods, including emerging diffusion models, face limitations in quality and efficiency. This paper introduces the Flow-Guided Diffusion model for Video Inpainting (FGDVI), a novel approach that significantly enhances temporal consistency and inpainting quality via reusing an off-the-shelf image generation diffusion model. We employ optical flow for precise one-step latent propagation and introduces a model-agnostic flow-guided latent interpolation technique. This technique expedites denoising, seamlessly integrating with any Video Diffusion Model (VDM) without additional training. Our FGDVI demonstrates a remarkable 10% improvement in flow warping error E_warp over existing state-of-the-art methods. Our comprehensive experiments validate superior performance of FGDVI, offering a promising direction for advanced video inpainting. The code and detailed results will be publicly available in https://github.com/NevSNev/FGDVI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.15368v1-abstract-full').style.display = 'none'; document.getElementById('2311.15368v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.06816">arXiv:2311.06816</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.06816">pdf</a>, <a href="https://arxiv.org/format/2311.06816">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> On original and latent space connectivity in deep neural networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Boyang Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Borovykh%2C+A">Anastasia Borovykh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.06816v1-abstract-short" style="display: inline;"> We study whether inputs from the same class can be connected by a continuous path, in original or latent representation space, such that all points on the path are mapped by the neural network model to the same class. Understanding how the neural network views its own input space and how the latent spaces are structured has value for explainability and robustness. We show that paths, linear or non&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.06816v1-abstract-full').style.display = 'inline'; document.getElementById('2311.06816v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.06816v1-abstract-full" style="display: none;"> We study whether inputs from the same class can be connected by a continuous path, in original or latent representation space, such that all points on the path are mapped by the neural network model to the same class. Understanding how the neural network views its own input space and how the latent spaces are structured has value for explainability and robustness. We show that paths, linear or nonlinear, connecting same-class inputs exist in all cases studied. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.06816v1-abstract-full').style.display = 'none'; document.getElementById('2311.06816v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.05112">arXiv:2311.05112</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.05112">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Survey of Large Language Models in Medicine: Progress, Application, and Challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+H">Hongjian Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+F">Fenglin Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Boyang Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Zou%2C+X">Xinyu Zou</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+J">Jinfa Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+J">Jinge Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yiru Li</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+S+S">Sam S. Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+P">Peilin Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Junling Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Hua%2C+Y">Yining Hua</a>, <a href="/search/cs?searchtype=author&amp;query=Mao%2C+C">Chengfeng Mao</a>, <a href="/search/cs?searchtype=author&amp;query=You%2C+C">Chenyu You</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+X">Xian Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Y">Yefeng Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Clifton%2C+L">Lei Clifton</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zheng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+J">Jiebo Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Clifton%2C+D+A">David A. Clifton</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.05112v7-abstract-short" style="display: inline;"> Large language models (LLMs), such as ChatGPT, have received substantial attention due to their capabilities for understanding and generating human language. While there has been a burgeoning trend in research focusing on the employment of LLMs in supporting different medical tasks (e.g., enhancing clinical diagnostics and providing medical education), a review of these efforts, particularly their&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.05112v7-abstract-full').style.display = 'inline'; document.getElementById('2311.05112v7-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.05112v7-abstract-full" style="display: none;"> Large language models (LLMs), such as ChatGPT, have received substantial attention due to their capabilities for understanding and generating human language. While there has been a burgeoning trend in research focusing on the employment of LLMs in supporting different medical tasks (e.g., enhancing clinical diagnostics and providing medical education), a review of these efforts, particularly their development, practical applications, and outcomes in medicine, remains scarce. Therefore, this review aims to provide a detailed overview of the development and deployment of LLMs in medicine, including the challenges and opportunities they face. In terms of development, we provide a detailed introduction to the principles of existing medical LLMs, including their basic model structures, number of parameters, and sources and scales of data used for model development. It serves as a guide for practitioners in developing medical LLMs tailored to their specific needs. In terms of deployment, we offer a comparison of the performance of different LLMs across various medical tasks, and further compare them with state-of-the-art lightweight models, aiming to provide an understanding of the advantages and limitations of LLMs in medicine. Overall, in this review, we address the following questions: 1) What are the practices for developing medical LLMs 2) How to measure the medical task performance of LLMs in a medical setting? 3) How have medical LLMs been employed in real-world practice? 4) What challenges arise from the use of medical LLMs? and 5) How to more effectively develop and deploy medical LLMs? By answering these questions, this review aims to provide insights into the opportunities for LLMs in medicine and serve as a practical resource. We also maintain a regularly updated list of practical guides on medical LLMs at https://github.com/AI-in-Health/MedLLMsPracticalGuide <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.05112v7-abstract-full').style.display = 'none'; document.getElementById('2311.05112v7-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint. Version 6. Update Figures 1-5; Tables 2-3; 31 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.14209">arXiv:2310.14209</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.14209">pdf</a>, <a href="https://arxiv.org/format/2310.14209">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SUT: Active Defects Probing for Transcompiler Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qi%2C+M">Mengnan Qi</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yufan Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+M">Maoquan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+Y">Yongqiang Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zihan Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Clement%2C+C">Colin Clement</a>, <a href="/search/cs?searchtype=author&amp;query=Sundaresan%2C+N">Neel Sundaresan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.14209v1-abstract-short" style="display: inline;"> Automatic Program translation has enormous application value and hence has been attracting significant interest from AI researchers. However, we observe that current program translation models still make elementary syntax errors, particularly, when the target language does not have syntax elements in the source language. Metrics like BLUE, CodeBLUE and computation accuracy may not expose these iss&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.14209v1-abstract-full').style.display = 'inline'; document.getElementById('2310.14209v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.14209v1-abstract-full" style="display: none;"> Automatic Program translation has enormous application value and hence has been attracting significant interest from AI researchers. However, we observe that current program translation models still make elementary syntax errors, particularly, when the target language does not have syntax elements in the source language. Metrics like BLUE, CodeBLUE and computation accuracy may not expose these issues. In this paper we introduce a new metrics for programming language translation and these metrics address these basic syntax errors. We develop a novel active defects probing suite called Syntactic Unit Tests (SUT) which includes a highly interpretable evaluation harness for accuracy and test scoring. Experiments have shown that even powerful models like ChatGPT still make mistakes on these basic unit tests. Specifically, compared to previous program translation task evaluation dataset, its pass rate on our unit tests has decreased by 26.15%. Further our evaluation harness reveal syntactic element errors in which these models exhibit deficiencies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.14209v1-abstract-full').style.display = 'none'; document.getElementById('2310.14209v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.11476">arXiv:2310.11476</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.11476">pdf</a>, <a href="https://arxiv.org/format/2310.11476">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Program Translation via Code Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yufan Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Qi%2C+M">Mengnan Qi</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+Y">Yongqiang Yao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+M">Maoquan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Clement%2C+C">Colin Clement</a>, <a href="/search/cs?searchtype=author&amp;query=Sundaresan%2C+N">Neel Sundaresan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.11476v1-abstract-short" style="display: inline;"> Software version migration and program translation are an important and costly part of the lifecycle of large codebases. Traditional machine translation relies on parallel corpora for supervised translation, which is not feasible for program translation due to a dearth of aligned data. Recent unsupervised neural machine translation techniques have overcome data limitations by included techniques s&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.11476v1-abstract-full').style.display = 'inline'; document.getElementById('2310.11476v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.11476v1-abstract-full" style="display: none;"> Software version migration and program translation are an important and costly part of the lifecycle of large codebases. Traditional machine translation relies on parallel corpora for supervised translation, which is not feasible for program translation due to a dearth of aligned data. Recent unsupervised neural machine translation techniques have overcome data limitations by included techniques such as back translation and low level compiler intermediate representations (IR). These methods face significant challenges due to the noise in code snippet alignment and the diversity of IRs respectively. In this paper we propose a novel model called Code Distillation (CoDist) whereby we capture the semantic and structural equivalence of code in a language agnostic intermediate representation. Distilled code serves as a translation pivot for any programming language, leading by construction to parallel corpora which scale to all available source code by simply applying the distillation compiler. We demonstrate that our approach achieves state-of-the-art performance on CodeXGLUE and TransCoder GeeksForGeeks translation benchmarks, with an average absolute increase of 12.7% on the TransCoder GeeksforGeeks translation benchmark compare to TransCoder-ST. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.11476v1-abstract-full').style.display = 'none'; document.getElementById('2310.11476v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.06483">arXiv:2310.06483</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.06483">pdf</a>, <a href="https://arxiv.org/format/2310.06483">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Variance Reduced Online Gradient Descent for Kernelized Pairwise Learning with Limited Memory </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=AlQuabeh%2C+H">Hilal AlQuabeh</a>, <a href="/search/cs?searchtype=author&amp;query=Mukhoty%2C+B">Bhaskar Mukhoty</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.06483v1-abstract-short" style="display: inline;"> Pairwise learning is essential in machine learning, especially for problems involving loss functions defined on pairs of training examples. Online gradient descent (OGD) algorithms have been proposed to handle online pairwise learning, where data arrives sequentially. However, the pairwise nature of the problem makes scalability challenging, as the gradient computation for a new sample involves al&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.06483v1-abstract-full').style.display = 'inline'; document.getElementById('2310.06483v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.06483v1-abstract-full" style="display: none;"> Pairwise learning is essential in machine learning, especially for problems involving loss functions defined on pairs of training examples. Online gradient descent (OGD) algorithms have been proposed to handle online pairwise learning, where data arrives sequentially. However, the pairwise nature of the problem makes scalability challenging, as the gradient computation for a new sample involves all past samples. Recent advancements in OGD algorithms have aimed to reduce the complexity of calculating online gradients, achieving complexities less than $O(T)$ and even as low as $O(1)$. However, these approaches are primarily limited to linear models and have induced variance. In this study, we propose a limited memory OGD algorithm that extends to kernel online pairwise learning while improving the sublinear regret. Specifically, we establish a clear connection between the variance of online gradients and the regret, and construct online gradients using the most recent stratified samples with a limited buffer of size of $s$ representing all past data, which have a complexity of $O(sT)$ and employs $O(\sqrt{T}\log{T})$ random Fourier features for kernel approximation. Importantly, our theoretical results demonstrate that the variance-reduced online gradients lead to an improved sublinear regret bound. The experiments on real-world datasets demonstrate the superiority of our algorithm over both kernelized and linear online pairwise learning algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.06483v1-abstract-full').style.display = 'none'; document.getElementById('2310.06483v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in ACML2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.08965">arXiv:2309.08965</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.08965">pdf</a>, <a href="https://arxiv.org/format/2309.08965">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Multiagent Reinforcement Learning with an Attention Mechanism for Improving Energy Efficiency in LoRa Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Z">Ziqi Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+S">Shimin Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bo Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Niyato%2C+D">Dusit Niyato</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.08965v1-abstract-short" style="display: inline;"> Long Range (LoRa) wireless technology, characterized by low power consumption and a long communication range, is regarded as one of the enabling technologies for the Industrial Internet of Things (IIoT). However, as the network scale increases, the energy efficiency (EE) of LoRa networks decreases sharply due to severe packet collisions. To address this issue, it is essential to appropriately assi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.08965v1-abstract-full').style.display = 'inline'; document.getElementById('2309.08965v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.08965v1-abstract-full" style="display: none;"> Long Range (LoRa) wireless technology, characterized by low power consumption and a long communication range, is regarded as one of the enabling technologies for the Industrial Internet of Things (IIoT). However, as the network scale increases, the energy efficiency (EE) of LoRa networks decreases sharply due to severe packet collisions. To address this issue, it is essential to appropriately assign transmission parameters such as the spreading factor and transmission power for each end device (ED). However, due to the sporadic traffic and low duty cycle of LoRa networks, evaluating the system EE performance under different parameter settings is time-consuming. Therefore, we first formulate an analytical model to calculate the system EE. On this basis, we propose a transmission parameter allocation algorithm based on multiagent reinforcement learning (MALoRa) with the aim of maximizing the system EE of LoRa networks. Notably, MALoRa employs an attention mechanism to guide each ED to better learn how much &#39;&#39;attention&#39;&#39; should be given to the parameter assignments for relevant EDs when seeking to improve the system EE. Simulation results demonstrate that MALoRa significantly improves the system EE compared with baseline algorithms with an acceptable degradation in packet delivery rate (PDR). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.08965v1-abstract-full').style.display = 'none'; document.getElementById('2309.08965v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 3 figures, This paper has been accepted for publication in IEEE Global Communications Conference (GLOBECOM) 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.16031">arXiv:2308.16031</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.16031">pdf</a>, <a href="https://arxiv.org/format/2308.16031">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/COMST.2024.3436082">10.1109/COMST.2024.3436082 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Breaking the Interference and Fading Gridlock in Backscatter Communications: State-of-the-Art, Design Challenges, and Future Directions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bowen Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+H">Haiyang Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+G">Gongpu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Tellambura%2C+C">Chintha Tellambura</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.16031v2-abstract-short" style="display: inline;"> As the Internet of Things (IoT) advances by leaps and bounds, a multitude of devices are becoming interconnected, marking the onset of an era where all things are connected. While this growth opens up opportunities for novel products and applications, it also leads to increased energy demand and battery reliance for IoT devices, creating a significant bottleneck that hinders sustainable progress.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.16031v2-abstract-full').style.display = 'inline'; document.getElementById('2308.16031v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.16031v2-abstract-full" style="display: none;"> As the Internet of Things (IoT) advances by leaps and bounds, a multitude of devices are becoming interconnected, marking the onset of an era where all things are connected. While this growth opens up opportunities for novel products and applications, it also leads to increased energy demand and battery reliance for IoT devices, creating a significant bottleneck that hinders sustainable progress. At this juncture, backscatter communication (BackCom), as a low-power and passive communication method, emerges as one of the promising solutions to this energy impasse by reducing the manufacturing costs and energy consumption of IoT devices. However, BackCom systems face challenges such as complex interference environments, including direct link interference (DLI) and mutual interference (MI) between tags, which can severely disrupt the efficiency of BackCom networks. Moreover, double-path fading is another major issue that leads to the degraded system performance. To fully unleash the potential of BackComs, the purpose of this paper is to furnish a comprehensive review of existing solutions with a focus on combatting these specific interference challenges and overcoming dual-path fading, offering an insightful analysis and comparison of various strategies for effectively mitigating these issues. Specifically, we begin by introducing the preliminaries for the BackCom, including its history, operating mechanisms, main architectures, etc, providing a foundational understanding of the field. Then, we delve into fundamental issues related to BackCom systems, such as solutions for the DLI, the MI, and the double-path fading. This paper thoroughly provides state-of-the-art advances for each case, particularly highlighting how the latest innovations in theoretical approaches and system design can strategically address these challenges. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.16031v2-abstract-full').style.display = 'none'; document.getElementById('2308.16031v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.16077">arXiv:2306.16077</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.16077">pdf</a>, <a href="https://arxiv.org/format/2306.16077">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Secure and Fast Asynchronous Vertical Federated Learning via Cascaded Hybrid Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+G">Ganyu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Q">Qingsong Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiang%2C+L">Li Xiang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+B">Boyu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Ling%2C+C">Charles Ling</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.16077v2-abstract-short" style="display: inline;"> Vertical Federated Learning (VFL) attracts increasing attention because it empowers multiple parties to jointly train a privacy-preserving model over vertically partitioned data. Recent research has shown that applying zeroth-order optimization (ZOO) has many advantages in building a practical VFL algorithm. However, a vital problem with the ZOO-based VFL is its slow convergence rate, which limits&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.16077v2-abstract-full').style.display = 'inline'; document.getElementById('2306.16077v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.16077v2-abstract-full" style="display: none;"> Vertical Federated Learning (VFL) attracts increasing attention because it empowers multiple parties to jointly train a privacy-preserving model over vertically partitioned data. Recent research has shown that applying zeroth-order optimization (ZOO) has many advantages in building a practical VFL algorithm. However, a vital problem with the ZOO-based VFL is its slow convergence rate, which limits its application in handling modern large models. To address this problem, we propose a cascaded hybrid optimization method in VFL. In this method, the downstream models (clients) are trained with ZOO to protect privacy and ensure that no internal information is shared. Meanwhile, the upstream model (server) is updated with first-order optimization (FOO) locally, which significantly improves the convergence rate, making it feasible to train the large models without compromising privacy and security. We theoretically prove that our VFL framework converges faster than the ZOO-based VFL, as the convergence of our framework is not limited by the size of the server model, making it effective for training large models with the major part on the server. Extensive experiments demonstrate that our method achieves faster convergence than the ZOO-based VFL framework, while maintaining an equivalent level of privacy protection. Moreover, we show that the convergence of our VFL is comparable to the unsafe FOO-based VFL baseline. Additionally, we demonstrate that our method makes the training of a large model feasible. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.16077v2-abstract-full').style.display = 'none'; document.getElementById('2306.16077v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under Review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.13874">arXiv:2306.13874</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.13874">pdf</a>, <a href="https://arxiv.org/format/2306.13874">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Spectrum Sensing via Reconfigurable Intelligent Surfaces: Passive or Active Sensing and How Many Reflecting Elements are Needed? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xie%2C+H">Hao Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bowen Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.13874v2-abstract-short" style="display: inline;"> Cognitive radio has been proposed to alleviate the scarcity of available spectrum caused by the significant demand for wideband services and the fragmentation of spectrum resources. However, sensing performance is quite poor due to the low sensing signal-to-noise ratio, especially in complex environments with severe channel fading. Fortunately, reconfigurable intelligent surface (RIS)-aided spectr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.13874v2-abstract-full').style.display = 'inline'; document.getElementById('2306.13874v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.13874v2-abstract-full" style="display: none;"> Cognitive radio has been proposed to alleviate the scarcity of available spectrum caused by the significant demand for wideband services and the fragmentation of spectrum resources. However, sensing performance is quite poor due to the low sensing signal-to-noise ratio, especially in complex environments with severe channel fading. Fortunately, reconfigurable intelligent surface (RIS)-aided spectrum sensing can effectively tackle the above challenge due to its high array gain. Nevertheless, the traditional passive RIS may suffer from the ``double fading&#39;&#39; effect, which severely limits the performance of passive RIS-aided spectrum sensing. Thus, a crucial challenge is how to fully exploit the potential advantages of the RIS and further improve the sensing performance. To this end, we introduce the active RIS into spectrum sensing and respectively formulate two optimization problems for the passive RIS and the active RIS to maximize the detection probability. In light of the intractability of the formulated problems, we develop a one-stage optimization algorithm with inner approximation and a two-stage optimization algorithm with a bisection method to obtain sub-optimal solutions, and apply the Rayleigh quotient to obtain the upper and lower bounds of the detection probability. Furthermore, in order to gain more insight into the impact of the RIS on spectrum sensing, we respectively investigate the number configuration for passive RIS and active RIS and analyze how many reflecting elements are needed to achieve the detection probability close to 1. Simulation results verify that the proposed algorithms outperform existing algorithms under the same parameter configuration, and achieve a detection probability close to 1 with even fewer reflecting elements or antennas than existing schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.13874v2-abstract-full').style.display = 'none'; document.getElementById('2306.13874v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.05751">arXiv:2306.05751</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.05751">pdf</a>, <a href="https://arxiv.org/format/2306.05751">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Advancing Counterfactual Inference through Nonlinear Quantile Regression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xie%2C+S">Shaoan Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+B">Biwei Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+T">Tongliang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+K">Kun Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.05751v3-abstract-short" style="display: inline;"> The capacity to address counterfactual &#34;what if&#34; inquiries is crucial for understanding and making use of causal influences. Traditional counterfactual inference, under Pearls&#39; counterfactual framework, typically depends on having access to or estimating a structural causal model. Yet, in practice, this causal model is often unknown and might be challenging to identify. Hence, this paper aims to p&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.05751v3-abstract-full').style.display = 'inline'; document.getElementById('2306.05751v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.05751v3-abstract-full" style="display: none;"> The capacity to address counterfactual &#34;what if&#34; inquiries is crucial for understanding and making use of causal influences. Traditional counterfactual inference, under Pearls&#39; counterfactual framework, typically depends on having access to or estimating a structural causal model. Yet, in practice, this causal model is often unknown and might be challenging to identify. Hence, this paper aims to perform reliable counterfactual inference based solely on observational data and the (learned) qualitative causal structure, without necessitating a predefined causal model or even direct estimations of conditional distributions. To this end, we establish a novel connection between counterfactual inference and quantile regression and show that counterfactual inference can be reframed as an extended quantile regression problem. Building on this insight, we propose a practical framework for efficient and effective counterfactual inference implemented with neural networks under a bi-level optimization scheme. The proposed approach enhances the capacity to generalize estimated counterfactual outcomes to unseen data, thereby providing an upper bound on the generalization error. Furthermore, empirical evidence demonstrates its superior statistical efficiency in comparison to existing methods. Empirical results conducted on multiple datasets offer compelling support for our theoretical assertions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.05751v3-abstract-full').style.display = 'none'; document.getElementById('2306.05751v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.01260">arXiv:2306.01260</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2306.01260">pdf</a>, <a href="https://arxiv.org/format/2306.01260">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> FREPA: An Automated and Formal Approach to Requirement Modeling and Analysis in Aircraft Control Domain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Feng%2C+J">Jincao Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Miao%2C+W">Weikai Miao</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+H">Hanyue Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yihao Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jianwen Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zheng Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Su%2C+T">Ting Su</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Pu%2C+G">Geguang Pu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+M">Mengfei Yang</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+J">Jifeng He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.01260v1-abstract-short" style="display: inline;"> Formal methods are promising for modeling and analyzing system requirements. However, applying formal methods to large-scale industrial projects is a remaining challenge. The industrial engineers are suffering from the lack of automated engineering methodologies to effectively conduct precise requirement models, and rigorously validate and verify (V&amp;V) the generated models. To tackle this challeng&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.01260v1-abstract-full').style.display = 'inline'; document.getElementById('2306.01260v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.01260v1-abstract-full" style="display: none;"> Formal methods are promising for modeling and analyzing system requirements. However, applying formal methods to large-scale industrial projects is a remaining challenge. The industrial engineers are suffering from the lack of automated engineering methodologies to effectively conduct precise requirement models, and rigorously validate and verify (V&amp;V) the generated models. To tackle this challenge, in this paper, we present a systematic engineering approach, named Formal Requirement Engineering Platform in Aircraft (FREPA), for formal requirement modeling and V\&amp;V in the aerospace and aviation control domains. FREPA is an outcome of the seamless collaboration between the academy and industry over the last eight years. The main contributions of this paper include 1) an automated and systematic engineering approach FREPA to construct requirement models, validate and verify systems in the aerospace and aviation control domain, 2) a domain-specific modeling language AASRDL to describe the formal specification, and 3) a practical FREPA-based tool AeroReq which has been used by our industry partners. We have successfully adopted FREPA to seven real aerospace gesture control and two aviation engine control systems. The experimental results show that FREPA and the corresponding tool AeroReq significantly facilitate formal modeling and V&amp;V in the industry. Moreover, we also discuss the experiences and lessons gained from using FREPA in aerospace and aviation projects. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.01260v1-abstract-full').style.display = 'none'; document.getElementById('2306.01260v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, Published by FSE 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.09946">arXiv:2305.09946</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.09946">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1038/s41698-024-00690-y">10.1038/s41698-024-00690-y <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> AdaMSS: Adaptive Multi-Modality Segmentation-to-Survival Learning for Survival Outcome Prediction from PET/CT Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Meng%2C+M">Mingyuan Meng</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bingxin Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Fulham%2C+M">Michael Fulham</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+S">Shaoli Song</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+D">Dagan Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Bi%2C+L">Lei Bi</a>, <a href="/search/cs?searchtype=author&amp;query=Kim%2C+J">Jinman Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.09946v3-abstract-short" style="display: inline;"> Survival prediction is a major concern for cancer management. Deep survival models based on deep learning have been widely adopted to perform end-to-end survival prediction from medical images. Recent deep survival models achieved promising performance by jointly performing tumor segmentation with survival prediction, where the models were guided to extract tumor-related information through Multi-&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.09946v3-abstract-full').style.display = 'inline'; document.getElementById('2305.09946v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.09946v3-abstract-full" style="display: none;"> Survival prediction is a major concern for cancer management. Deep survival models based on deep learning have been widely adopted to perform end-to-end survival prediction from medical images. Recent deep survival models achieved promising performance by jointly performing tumor segmentation with survival prediction, where the models were guided to extract tumor-related information through Multi-Task Learning (MTL). However, these deep survival models have difficulties in exploring out-of-tumor prognostic information. In addition, existing deep survival models are unable to effectively leverage multi-modality images. Empirically-designed fusion strategies were commonly adopted to fuse multi-modality information via task-specific manually-designed networks, thus limiting the adaptability to different scenarios. In this study, we propose an Adaptive Multi-modality Segmentation-to-Survival model (AdaMSS) for survival prediction from PET/CT images. Instead of adopting MTL, we propose a novel Segmentation-to-Survival Learning (SSL) strategy, where our AdaMSS is trained for tumor segmentation and survival prediction sequentially in two stages. This strategy enables the AdaMSS to focus on tumor regions in the first stage and gradually expand its focus to include other prognosis-related regions in the second stage. We also propose a data-driven strategy to fuse multi-modality information, which realizes adaptive optimization of fusion strategies based on training data during training. With the SSL and data-driven fusion strategies, our AdaMSS is designed as an adaptive model that can self-adapt its focus regions and fusion strategy for different training stages. Extensive experiments with two large clinical datasets show that our AdaMSS outperforms state-of-the-art survival prediction methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.09946v3-abstract-full').style.display = 'none'; document.getElementById('2305.09946v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The extended version of this paper has been published at npj Precision Oncology as &#34;Adaptive segmentation-to-survival learning for survival prediction from multi-modality medical images&#34;</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> npj Precision Oncology, vol. 8, p. 232, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.06094">arXiv:2305.06094</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.06094">pdf</a>, <a href="https://arxiv.org/format/2305.06094">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TVT.2023.3348200">10.1109/TVT.2023.3348200 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Computation-Efficient Backscatter-Blessed MEC with User Reciprocity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bowen Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+H">Hao Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dong Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.06094v1-abstract-short" style="display: inline;"> This letter proposes a new user cooperative offloading protocol called user reciprocity in backscatter communication (BackCom)-aided mobile edge computing systems with efficient computation, whose quintessence is that each user can switch alternately between the active or the BackCom mode in different slots, and one user works in the active mode and the other user works in the BackCom mode in each&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.06094v1-abstract-full').style.display = 'inline'; document.getElementById('2305.06094v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.06094v1-abstract-full" style="display: none;"> This letter proposes a new user cooperative offloading protocol called user reciprocity in backscatter communication (BackCom)-aided mobile edge computing systems with efficient computation, whose quintessence is that each user can switch alternately between the active or the BackCom mode in different slots, and one user works in the active mode and the other user works in the BackCom mode in each time slot. In particular, the user in the BackCom mode can always use the signal transmitted by the user in the active mode for more data transmission in a spectrum-sharing manner. To evaluate the proposed protocol, a computation efficiency (CE) maximization-based optimization problem is formulated by jointly power control, time scheduling, reflection coefficient adjustment, and computing frequency allocation, while satisfying various physical constraints on the maximum energy budget, the computing frequency threshold, the minimum computed bits, and harvested energy threshold. To solve this non-convex problem, Dinkelbach&#39;s method and quadratic transform are first employed to transform the complex fractional forms into linear ones. Then, an iterative algorithm is designed by decomposing the resulting problem to obtain the suboptimal solution. The closed-form solutions for the transmit power, the RC, and the local computing frequency are provided for more insights. Besides, the analytical performance gain with the reciprocal mode is also derived. Simulation results demonstrate that the proposed scheme outperforms benchmark schemes regarding the CE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.06094v1-abstract-full').style.display = 'none'; document.getElementById('2305.06094v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.11335">arXiv:2304.11335</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2304.11335">pdf</a>, <a href="https://arxiv.org/format/2304.11335">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Two Birds, One Stone: A Unified Framework for Joint Learning of Image and Video Style Transfers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bohai Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+H">Heng Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Libo Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.11335v2-abstract-short" style="display: inline;"> Current arbitrary style transfer models are limited to either image or video domains. In order to achieve satisfying image and video style transfers, two different models are inevitably required with separate training processes on image and video domains, respectively. In this paper, we show that this can be precluded by introducing UniST, a Unified Style Transfer framework for both images and vid&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.11335v2-abstract-full').style.display = 'inline'; document.getElementById('2304.11335v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.11335v2-abstract-full" style="display: none;"> Current arbitrary style transfer models are limited to either image or video domains. In order to achieve satisfying image and video style transfers, two different models are inevitably required with separate training processes on image and video domains, respectively. In this paper, we show that this can be precluded by introducing UniST, a Unified Style Transfer framework for both images and videos. At the core of UniST is a domain interaction transformer (DIT), which first explores context information within the specific domain and then interacts contextualized domain information for joint learning. In particular, DIT enables exploration of temporal information from videos for the image style transfer task and meanwhile allows rich appearance texture from images for video style transfer, thus leading to mutual benefits. Considering heavy computation of traditional multi-head self-attention, we present a simple yet effective axial multi-head self-attention (AMSA) for DIT, which improves computational efficiency while maintains style transfer performance. To verify the effectiveness of UniST, we conduct extensive experiments on both image and video style transfer tasks and show that UniST performs favorably against state-of-the-art approaches on both tasks. Code is available at https://github.com/NevSNev/UniST. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.11335v2-abstract-full').style.display = 'none'; document.getElementById('2304.11335v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Conference on International Conference on Computer Vision.(ICCV 2023)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.01249">arXiv:2303.01249</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2303.01249">pdf</a>, <a href="https://arxiv.org/ps/2303.01249">ps</a>, <a href="https://arxiv.org/format/2303.01249">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Language-Universal Adapter Learning with Knowledge Distillation for End-to-End Multilingual Speech Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shen%2C+Z">Zhijie Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+W">Wu Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.01249v1-abstract-short" style="display: inline;"> In this paper, we propose a language-universal adapter learning framework based on a pre-trained model for end-to-end multilingual automatic speech recognition (ASR). For acoustic modeling, the wav2vec 2.0 pre-trained model is fine-tuned by inserting language-specific and language-universal adapters. An online knowledge distillation is then used to enable the language-universal adapters to learn b&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.01249v1-abstract-full').style.display = 'inline'; document.getElementById('2303.01249v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.01249v1-abstract-full" style="display: none;"> In this paper, we propose a language-universal adapter learning framework based on a pre-trained model for end-to-end multilingual automatic speech recognition (ASR). For acoustic modeling, the wav2vec 2.0 pre-trained model is fine-tuned by inserting language-specific and language-universal adapters. An online knowledge distillation is then used to enable the language-universal adapters to learn both language-specific and universal features. The linguistic information confusion is also reduced by leveraging language identifiers (LIDs). With LIDs we perform a position-wise modification on the multi-head attention outputs. In the inference procedure, the language-specific adapters are removed while the language-universal adapters are kept activated. The proposed method improves the recognition accuracy and addresses the linear increase of the number of adapters&#39; parameters with the number of languages in common multilingual ASR systems. Experiments on the BABEL dataset confirm the effectiveness of the proposed framework. Compared to the conventional multilingual model, a 3.3% absolute error rate reduction is achieved. The code is available at: https://github.com/shen9712/UniversalAdapterLearning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.01249v1-abstract-full').style.display = 'none'; document.getElementById('2303.01249v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.09967">arXiv:2302.09967</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.09967">pdf</a>, <a href="https://arxiv.org/format/2302.09967">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Stability-based Generalization Analysis for Mixtures of Pointwise and Pairwise Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jiahuan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jun Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+H">Hong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+W">Weifu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+X">Xin Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.09967v1-abstract-short" style="display: inline;"> Recently, some mixture algorithms of pointwise and pairwise learning (PPL) have been formulated by employing the hybrid error metric of &#34;pointwise loss + pairwise loss&#34; and have shown empirical effectiveness on feature selection, ranking and recommendation tasks. However, to the best of our knowledge, the learning theory foundation of PPL has not been touched in the existing works. In this paper,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.09967v1-abstract-full').style.display = 'inline'; document.getElementById('2302.09967v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.09967v1-abstract-full" style="display: none;"> Recently, some mixture algorithms of pointwise and pairwise learning (PPL) have been formulated by employing the hybrid error metric of &#34;pointwise loss + pairwise loss&#34; and have shown empirical effectiveness on feature selection, ranking and recommendation tasks. However, to the best of our knowledge, the learning theory foundation of PPL has not been touched in the existing works. In this paper, we try to fill this theoretical gap by investigating the generalization properties of PPL. After extending the definitions of algorithmic stability to the PPL setting, we establish the high-probability generalization bounds for uniformly stable PPL algorithms. Moreover, explicit convergence rates of stochastic gradient descent (SGD) and regularized risk minimization (RRM) for PPL are stated by developing the stability analysis technique of pairwise learning. In addition, the refined generalization bounds of PPL are obtained by replacing uniform stability with on-average stability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.09967v1-abstract-full').style.display = 'none'; document.getElementById('2302.09967v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.09815">arXiv:2302.09815</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.09815">pdf</a>, <a href="https://arxiv.org/format/2302.09815">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> On the Stability and Generalization of Triplet Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jun Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+H">Hong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+X">Xue Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+W">Weifu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+T">Tieliang Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+F">Feng Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.09815v1-abstract-short" style="display: inline;"> Triplet learning, i.e. learning from triplet data, has attracted much attention in computer vision tasks with an extremely large number of categories, e.g., face recognition and person re-identification. Albeit with rapid progress in designing and applying triplet learning algorithms, there is a lacking study on the theoretical understanding of their generalization performance. To fill this gap, t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.09815v1-abstract-full').style.display = 'inline'; document.getElementById('2302.09815v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.09815v1-abstract-full" style="display: none;"> Triplet learning, i.e. learning from triplet data, has attracted much attention in computer vision tasks with an extremely large number of categories, e.g., face recognition and person re-identification. Albeit with rapid progress in designing and applying triplet learning algorithms, there is a lacking study on the theoretical understanding of their generalization performance. To fill this gap, this paper investigates the generalization guarantees of triplet learning by leveraging the stability analysis. Specifically, we establish the first general high-probability generalization bound for the triplet learning algorithm satisfying the uniform stability, and then obtain the excess risk bounds of the order $O(n^{-\frac{1}{2}} \mathrm{log}n)$ for both stochastic gradient descent (SGD) and regularized risk minimization (RRM), where $2n$ is approximately equal to the number of training samples. Moreover, an optimistic generalization bound in expectation as fast as $O(n^{-1})$ is derived for RRM in a low noise case via the on-average stability analysis. Finally, our results are applied to triplet metric learning to characterize its theoretical underpinning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.09815v1-abstract-full').style.display = 'none'; document.getElementById('2302.09815v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.00910">arXiv:2302.00910</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2302.00910">pdf</a>, <a href="https://arxiv.org/format/2302.00910">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Energy Efficient Training of SNN using Local Zeroth Order Method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Mukhoty%2C+B">Bhaskar Mukhoty</a>, <a href="/search/cs?searchtype=author&amp;query=Bojkovic%2C+V">Velibor Bojkovic</a>, <a href="/search/cs?searchtype=author&amp;query=de+Vazelhes%2C+W">William de Vazelhes</a>, <a href="/search/cs?searchtype=author&amp;query=De+Masi%2C+G">Giulia De Masi</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+H">Huan Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.00910v2-abstract-short" style="display: inline;"> Spiking neural networks are becoming increasingly popular for their low energy requirement in real-world tasks with accuracy comparable to the traditional ANNs. SNN training algorithms face the loss of gradient information and non-differentiability due to the Heaviside function in minimizing the model loss over model parameters. To circumvent the problem surrogate method uses a differentiable appr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.00910v2-abstract-full').style.display = 'inline'; document.getElementById('2302.00910v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.00910v2-abstract-full" style="display: none;"> Spiking neural networks are becoming increasingly popular for their low energy requirement in real-world tasks with accuracy comparable to the traditional ANNs. SNN training algorithms face the loss of gradient information and non-differentiability due to the Heaviside function in minimizing the model loss over model parameters. To circumvent the problem surrogate method uses a differentiable approximation of the Heaviside in the backward pass, while the forward pass uses the Heaviside as the spiking function. We propose to use the zeroth order technique at the neuron level to resolve this dichotomy and use it within the automatic differentiation tool. As a result, we establish a theoretical connection between the proposed local zeroth-order technique and the existing surrogate methods and vice-versa. The proposed method naturally lends itself to energy-efficient training of SNNs on GPUs. Experimental results with neuromorphic datasets show that such implementation requires less than 1 percent neurons to be active in the backward pass, resulting in a 100x speed-up in the backward computation time. Our method offers better generalization compared to the state-of-the-art energy-efficient technique while maintaining similar efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.00910v2-abstract-full').style.display = 'none'; document.getElementById('2302.00910v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.13390">arXiv:2212.13390</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2212.13390">pdf</a>, <a href="https://arxiv.org/format/2212.13390">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Hierarchical Deep Reinforcement Learning for Age-of-Information Minimization in IRS-aided and Wireless-powered Wireless Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gong%2C+S">Shimin Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+L">Leiyang Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bo Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Lyu%2C+B">Bin Lyu</a>, <a href="/search/cs?searchtype=author&amp;query=Hoang%2C+D+T">Dinh Thai Hoang</a>, <a href="/search/cs?searchtype=author&amp;query=Niyato%2C+D">Dusit Niyato</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.13390v1-abstract-short" style="display: inline;"> In this paper, we focus on a wireless-powered sensor network coordinated by a multi-antenna access point (AP). Each node can generate sensing information and report the latest information to the AP using the energy harvested from the AP&#39;s signal beamforming. We aim to minimize the average age-of-information (AoI) by adapting the nodes&#39; transmission scheduling and the transmission control strategie&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.13390v1-abstract-full').style.display = 'inline'; document.getElementById('2212.13390v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.13390v1-abstract-full" style="display: none;"> In this paper, we focus on a wireless-powered sensor network coordinated by a multi-antenna access point (AP). Each node can generate sensing information and report the latest information to the AP using the energy harvested from the AP&#39;s signal beamforming. We aim to minimize the average age-of-information (AoI) by adapting the nodes&#39; transmission scheduling and the transmission control strategies jointly. To reduce the transmission delay, an intelligent reflecting surface (IRS) is used to enhance the channel conditions by controlling the AP&#39;s beamforming vector and the IRS&#39;s phase shifting matrix. Considering dynamic data arrivals at different sensing nodes, we propose a hierarchical deep reinforcement learning (DRL) framework to for AoI minimization in two steps. The users&#39; transmission scheduling is firstly determined by the outer-loop DRL approach, e.g. the DQN or PPO algorithm, and then the inner-loop optimization is used to adapt either the uplink information transmission or downlink energy transfer to all nodes. A simple and efficient approximation is also proposed to reduce the inner-loop rum time overhead. Numerical results verify that the hierarchical learning framework outperforms typical baselines in terms of the average AoI and proportional fairness among different nodes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.13390v1-abstract-full').style.display = 'none'; document.getElementById('2212.13390v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">31 pages, 6 figures, 2 tables, 3 algorithms</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.08298">arXiv:2212.08298</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2212.08298">pdf</a>, <a href="https://arxiv.org/format/2212.08298">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Exploring Hybrid Active-Passive RIS-Aided MEC Systems: From the Mode-Switching Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xie%2C+H">Hao Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bowen Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.08298v3-abstract-short" style="display: inline;"> Mobile edge computing (MEC) has been regarded as a promising technique to support latencysensitivity and computation-intensive serves. However, the low offloading rate caused by the random channel fading characteristic becomes a major bottleneck in restricting the performance of the MEC. Fortunately, reconfigurable intelligent surface (RIS) can alleviate this problem since it can boost both the sp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.08298v3-abstract-full').style.display = 'inline'; document.getElementById('2212.08298v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.08298v3-abstract-full" style="display: none;"> Mobile edge computing (MEC) has been regarded as a promising technique to support latencysensitivity and computation-intensive serves. However, the low offloading rate caused by the random channel fading characteristic becomes a major bottleneck in restricting the performance of the MEC. Fortunately, reconfigurable intelligent surface (RIS) can alleviate this problem since it can boost both the spectrum- and energy- efficiency. Different from the existing works adopting either fully active or fully passive RIS, we propose a novel hybrid RIS in which reflecting units can flexibly switch between active and passive modes. To achieve a tradeoff between the latency and energy consumption, an optimization problem is formulated by minimizing the total cost. In light of the intractability of the problem, we develop an alternating optimization-based iterative algorithm by combining the successive convex approximation method, the variable substitution, and the singular value decomposition (SVD) to obtain sub-optimal solutions. Furthermore, in order to gain more insight into the problem, we consider two special cases involving a latency minimization problem and an energy consumption minimization problem, and respectively analyze the tradeoff between the number of active and passive units. Simulation results verify that the proposed algorithm can achieve flexible mode switching and significantly outperforms existing algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.08298v3-abstract-full').style.display = 'none'; document.getElementById('2212.08298v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.11751">arXiv:2211.11751</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2211.11751">pdf</a>, <a href="https://arxiv.org/format/2211.11751">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Denoising Multi-Similarity Formulation: A Self-paced Curriculum-Driven Approach for Robust Metric Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Chenkang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+L">Lei Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.11751v2-abstract-short" style="display: inline;"> Deep Metric Learning (DML) is a group of techniques that aim to measure the similarity between objects through the neural network. Although the number of DML methods has rapidly increased in recent years, most previous studies cannot effectively handle noisy data, which commonly exists in practical applications and often leads to serious performance deterioration. To overcome this limitation, in t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.11751v2-abstract-full').style.display = 'inline'; document.getElementById('2211.11751v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.11751v2-abstract-full" style="display: none;"> Deep Metric Learning (DML) is a group of techniques that aim to measure the similarity between objects through the neural network. Although the number of DML methods has rapidly increased in recent years, most previous studies cannot effectively handle noisy data, which commonly exists in practical applications and often leads to serious performance deterioration. To overcome this limitation, in this paper, we build a connection between noisy samples and hard samples in the framework of self-paced learning, and propose a \underline{B}alanced \underline{S}elf-\underline{P}aced \underline{M}etric \underline{L}earning (BSPML) algorithm with a denoising multi-similarity formulation, where noisy samples are treated as extremely hard samples and adaptively excluded from the model training by sample weighting. Especially, due to the pairwise relationship and a new balance regularization term, the sub-problem \emph{w.r.t.} sample weights is a nonconvex quadratic function. To efficiently solve this nonconvex quadratic problem, we propose a doubly stochastic projection coordinate gradient algorithm. Importantly, we theoretically prove the convergence not only for the doubly stochastic projection coordinate gradient algorithm, but also for our BSPML algorithm. Experimental results on several standard data sets demonstrate that our BSPML algorithm has better generalization ability and robustness than the state-of-the-art robust DML approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.11751v2-abstract-full').style.display = 'none'; document.getElementById('2211.11751v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.05279">arXiv:2210.05279</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.05279">pdf</a>, <a href="https://arxiv.org/format/2210.05279">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Zeroth-Order Hard-Thresholding: Gradient Error vs. Expansivity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=de+Vazelhes%2C+W">William de Vazelhes</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+H">Hualin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+H">Huimin Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+X">Xiao-Tong Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.05279v2-abstract-short" style="display: inline;"> $\ell_0&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.05279v2-abstract-full').style.display = 'inline'; document.getElementById('2210.05279v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.05279v2-abstract-full" style="display: none;"> $\ell_0$ constrained optimization is prevalent in machine learning, particularly for high-dimensional problems, because it is a fundamental approach to achieve sparse learning. Hard-thresholding gradient descent is a dominant technique to solve this problem. However, first-order gradients of the objective function may be either unavailable or expensive to calculate in a lot of real-world problems, where zeroth-order (ZO) gradients could be a good surrogate. Unfortunately, whether ZO gradients can work with the hard-thresholding operator is still an unsolved problem. To solve this puzzle, in this paper, we focus on the $\ell_0$ constrained black-box stochastic optimization problems, and propose a new stochastic zeroth-order gradient hard-thresholding (SZOHT) algorithm with a general ZO gradient estimator powered by a novel random support sampling. We provide the convergence analysis of SZOHT under standard assumptions. Importantly, we reveal a conflict between the deviation of ZO estimators and the expansivity of the hard-thresholding operator, and provide a theoretical minimal value of the number of random directions in ZO gradients. In addition, we find that the query complexity of SZOHT is independent or weakly dependent on the dimensionality under different settings. Finally, we illustrate the utility of our method on a portfolio optimization problem as well as black-box adversarial attacks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.05279v2-abstract-full').style.display = 'none'; document.getElementById('2210.05279v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication at NeurIPS 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.03674">arXiv:2210.03674</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.03674">pdf</a>, <a href="https://arxiv.org/format/2210.03674">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Reinforcement Learning Approach for Multi-Agent Flexible Scheduling Problems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+H">Hongjian Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Boyang Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+C">Chenghao Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.03674v1-abstract-short" style="display: inline;"> Scheduling plays an important role in automated production. Its impact can be found in various fields such as the manufacturing industry, the service industry and the technology industry. A scheduling problem (NP-hard) is a task of finding a sequence of job assignments on a given set of machines with the goal of optimizing the objective defined. Methods such as Operation Research, Dispatching Rule&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.03674v1-abstract-full').style.display = 'inline'; document.getElementById('2210.03674v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.03674v1-abstract-full" style="display: none;"> Scheduling plays an important role in automated production. Its impact can be found in various fields such as the manufacturing industry, the service industry and the technology industry. A scheduling problem (NP-hard) is a task of finding a sequence of job assignments on a given set of machines with the goal of optimizing the objective defined. Methods such as Operation Research, Dispatching Rules, and Combinatorial Optimization have been applied to scheduling problems but no solution guarantees to find the optimal solution. The recent development of Reinforcement Learning has shown success in sequential decision-making problems. This research presents a Reinforcement Learning approach for scheduling problems. In particular, this study delivers an OpenAI gym environment with search-space reduction for Job Shop Scheduling Problems and provides a heuristic-guided Q-Learning solution with state-of-the-art performance for Multi-agent Flexible Job Shop Problems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.03674v1-abstract-full').style.display = 'none'; document.getElementById('2210.03674v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.01496">arXiv:2210.01496</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2210.01496">pdf</a>, <a href="https://arxiv.org/format/2210.01496">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Zeroth-Order Negative Curvature Finding: Escaping Saddle Points without Gradients </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+H">Hualin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+H">Huan Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.01496v1-abstract-short" style="display: inline;"> We consider escaping saddle points of nonconvex problems where only the function evaluations can be accessed. Although a variety of works have been proposed, the majority of them require either second or first-order information, and only a few of them have exploited zeroth-order methods, particularly the technique of negative curvature finding with zeroth-order methods which has been proven to be&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.01496v1-abstract-full').style.display = 'inline'; document.getElementById('2210.01496v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.01496v1-abstract-full" style="display: none;"> We consider escaping saddle points of nonconvex problems where only the function evaluations can be accessed. Although a variety of works have been proposed, the majority of them require either second or first-order information, and only a few of them have exploited zeroth-order methods, particularly the technique of negative curvature finding with zeroth-order methods which has been proven to be the most efficient method for escaping saddle points. To fill this gap, in this paper, we propose two zeroth-order negative curvature finding frameworks that can replace Hessian-vector product computations without increasing the iteration complexity. We apply the proposed frameworks to ZO-GD, ZO-SGD, ZO-SCSG, ZO-SPIDER and prove that these ZO algorithms can converge to $(蔚,未)$-approximate second-order stationary points with less query complexity compared with prior zeroth-order works for finding local minima. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.01496v1-abstract-full').style.display = 'none'; document.getElementById('2210.01496v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.13100">arXiv:2209.13100</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2209.13100">pdf</a>, <a href="https://arxiv.org/format/2209.13100">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Gain without Pain: Recycling Reflected Energy from Wireless Powered RIS-aided Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xie%2C+H">Hao Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bowen Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Z">Zhi Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+Y">Yongjun Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.13100v1-abstract-short" style="display: inline;"> In this paper, we investigate and analyze energy recycling for a reconfigurable intelligent surface (RIS)-aided wireless-powered communication network. As opposed to the existing works where the energy harvested by Internet of things (IoT) devices only come from the power station, IoT devices are also allowed to recycle energy from other IoT devices. In particular, we propose group switching- and&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.13100v1-abstract-full').style.display = 'inline'; document.getElementById('2209.13100v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.13100v1-abstract-full" style="display: none;"> In this paper, we investigate and analyze energy recycling for a reconfigurable intelligent surface (RIS)-aided wireless-powered communication network. As opposed to the existing works where the energy harvested by Internet of things (IoT) devices only come from the power station, IoT devices are also allowed to recycle energy from other IoT devices. In particular, we propose group switching- and user switching-based protocols with time-division multiple access to evaluate the impact of energy recycling on system performance. Two different optimization problems are respectively formulated for maximizing the sum throughput by jointly optimizing the energy beamforming vectors, the transmit power, the transmission time, the receive beamforming vectors, the grouping factors, and the phase-shift matrices, where the constraints of the minimum throughput, the harvested energy, the maximum transmit power, the phase shift, the grouping, and the time allocation are taken into account. In light of the intractability of the above problems, we respectively develop two alternating optimization-based iterative algorithms by combining the successive convex approximation method and the penalty-based method to obtain corresponding sub-optimal solutions. Simulation results verify that the energy recycling-based mechanism can assist in enhancing the performance of IoT devices in terms of energy harvesting and information transmission. Besides, we also verify that the group switching-based algorithm can improve more sum throughput of IoT devices, and the user switching-based algorithm can harvest more energy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.13100v1-abstract-full').style.display = 'none'; document.getElementById('2209.13100v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.12526">arXiv:2209.12526</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2209.12526">pdf</a>, <a href="https://arxiv.org/format/2209.12526">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Exploiting Hybrid Active and Passive Multiple Access via Slotted ALOHA-Driven Backscatter Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bowen Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+H">Hao Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Ye Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+Y">Yongjun Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.12526v2-abstract-short" style="display: inline;"> In conventional backscatter communication (BackCom) systems, time division multiple access (TDMA) and frequency division multiple access (FDMA) are generally adopted for multiuser backscattering due to their simplicity in implementation. However, as the number of backscatter devices (BDs) proliferates, there will be a high overhead under the traditional centralized control techniques, and the inte&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.12526v2-abstract-full').style.display = 'inline'; document.getElementById('2209.12526v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.12526v2-abstract-full" style="display: none;"> In conventional backscatter communication (BackCom) systems, time division multiple access (TDMA) and frequency division multiple access (FDMA) are generally adopted for multiuser backscattering due to their simplicity in implementation. However, as the number of backscatter devices (BDs) proliferates, there will be a high overhead under the traditional centralized control techniques, and the inter-user coordination is unaffordable for the passive BDs, which are of scarce concern in existing works and remain unsolved. To this end, in this paper, we propose a slotted ALOHA-based random access for BackCom systems, in which each BD is randomly chosen and is allowed to coexist with one active device for hybrid multiple access. To excavate and evaluate the performance, a resource allocation problem for max-min transmission rate is formulated, where transmit antenna selection, receive beamforming design, reflection coefficient adjustment, power control, and access probability determination are jointly considered. To deal with this intractable problem, we first transform the objective function with the max-min form into an equivalent linear one, and then decompose the resulting problem into three sub-problems. Next, a block coordinate descent (BCD)-based greedy algorithm with a penalty function, successive convex approximation, and linear programming are designed to obtain sub-optimal solutions for tractable analysis. Simulation results demonstrate that the proposed algorithm outperforms benchmark algorithms in terms of transmission rate and fairness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.12526v2-abstract-full').style.display = 'none'; document.getElementById('2209.12526v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.07063">arXiv:2209.07063</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2209.07063">pdf</a>, <a href="https://arxiv.org/format/2209.07063">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> GAGA: Deciphering Age-path of Generalized Self-paced Regularizer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qu%2C+X">Xingyu Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Diyang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+X">Xiaohan Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+B">Bin Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.07063v2-abstract-short" style="display: inline;"> Nowadays self-paced learning (SPL) is an important machine learning paradigm that mimics the cognitive process of humans and animals. The SPL regime involves a self-paced regularizer and a gradually increasing age parameter, which plays a key role in SPL but where to optimally terminate this process is still non-trivial to determine. A natural idea is to compute the solution path w.r.t. age parame&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.07063v2-abstract-full').style.display = 'inline'; document.getElementById('2209.07063v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.07063v2-abstract-full" style="display: none;"> Nowadays self-paced learning (SPL) is an important machine learning paradigm that mimics the cognitive process of humans and animals. The SPL regime involves a self-paced regularizer and a gradually increasing age parameter, which plays a key role in SPL but where to optimally terminate this process is still non-trivial to determine. A natural idea is to compute the solution path w.r.t. age parameter (i.e., age-path). However, current age-path algorithms are either limited to the simplest regularizer, or lack solid theoretical understanding as well as computational efficiency. To address this challenge, we propose a novel \underline{G}eneralized \underline{Ag}e-path \underline{A}lgorithm (GAGA) for SPL with various self-paced regularizers based on ordinary differential equations (ODEs) and sets control, which can learn the entire solution spectrum w.r.t. a range of age parameters. To the best of our knowledge, GAGA is the first exact path-following algorithm tackling the age-path for general self-paced regularizer. Finally the algorithmic steps of classic SVM and Lasso are described in detail. We demonstrate the performance of GAGA on real-world datasets, and find considerable speedup between our algorithm and competing baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.07063v2-abstract-full').style.display = 'none'; document.getElementById('2209.07063v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">33 pages. Published as a conference paper at NeurIPS 2022</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Gu%2C+B&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Gu%2C+B&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Gu%2C+B&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10