CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 63 results for author: <span class="mathjax">Lei, Q</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Lei%2C+Q">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Lei, Q"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Lei%2C+Q&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Lei, Q"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Lei%2C+Q&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Lei%2C+Q&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Lei%2C+Q&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13954">arXiv:2502.13954</a> <span> [<a href="https://arxiv.org/pdf/2502.13954">pdf</a>, <a href="https://arxiv.org/format/2502.13954">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Latent Distribution Decoupling: A Probabilistic Framework for Uncertainty-Aware Multimodal Emotion Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jingwang Huang</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+J">Jiang Zhong</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qin Lei</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jinpeng Gao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yuming Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Sirui Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Peiguang Li</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+K">Kaiwen Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13954v1-abstract-short" style="display: inline;"> Multimodal multi-label emotion recognition (MMER) aims to identify the concurrent presence of multiple emotions in multimodal data. Existing studies primarily focus on improving fusion strategies and modeling modality-to-label dependencies. However, they often overlook the impact of \textbf{aleatoric uncertainty}, which is the inherent noise in the multimodal data and hinders the effectiveness of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13954v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13954v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13954v1-abstract-full" style="display: none;"> Multimodal multi-label emotion recognition (MMER) aims to identify the concurrent presence of multiple emotions in multimodal data. Existing studies primarily focus on improving fusion strategies and modeling modality-to-label dependencies. However, they often overlook the impact of \textbf{aleatoric uncertainty}, which is the inherent noise in the multimodal data and hinders the effectiveness of modality fusion by introducing ambiguity into feature representations. To address this issue and effectively model aleatoric uncertainty, this paper proposes Latent emotional Distribution Decomposition with Uncertainty perception (LDDU) framework from a novel perspective of latent emotional space probabilistic modeling. Specifically, we introduce a contrastive disentangled distribution mechanism within the emotion space to model the multimodal data, allowing for the extraction of semantic features and uncertainty. Furthermore, we design an uncertainty-aware fusion multimodal method that accounts for the dispersed distribution of uncertainty and integrates distribution information. Experimental results show that LDDU achieves state-of-the-art performance on the CMU-MOSEI and M$^3$ED datasets, highlighting the importance of uncertainty modeling in MMER. Code is available at https://github.com/201983290498/lddu\_mmer.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13954v1-abstract-full').style.display = 'none'; document.getElementById('2502.13954v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09850">arXiv:2502.09850</a> <span> [<a href="https://arxiv.org/pdf/2502.09850">pdf</a>, <a href="https://arxiv.org/format/2502.09850">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Elastic Representation: Mitigating Spurious Correlations for Group Robustness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+T">Tao Wen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zihan Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Quan Zhang</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09850v1-abstract-short" style="display: inline;"> Deep learning models can suffer from severe performance degradation when relying on spurious correlations between input features and labels, making the models perform well on training data but have poor prediction accuracy for minority groups. This problem arises especially when training data are limited or imbalanced. While most prior work focuses on learning invariant features (with consistent c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09850v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09850v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09850v1-abstract-full" style="display: none;"> Deep learning models can suffer from severe performance degradation when relying on spurious correlations between input features and labels, making the models perform well on training data but have poor prediction accuracy for minority groups. This problem arises especially when training data are limited or imbalanced. While most prior work focuses on learning invariant features (with consistent correlations to y), it overlooks the potential harm of spurious correlations between features. We hereby propose Elastic Representation (ElRep) to learn features by imposing Nuclear- and Frobenius-norm penalties on the representation from the last layer of a neural network. Similar to the elastic net, ElRep enjoys the benefits of learning important features without losing feature diversity. The proposed method is simple yet effective. It can be integrated into many deep learning approaches to mitigate spurious correlations and improve group robustness. Moreover, we theoretically show that ElRep has minimum negative impacts on in-distribution predictions. This is a remarkable advantage over approaches that prioritize minority groups at the cost of overall performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09850v1-abstract-full').style.display = 'none'; document.getElementById('2502.09850v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at AISTATS 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08505">arXiv:2502.08505</a> <span> [<a href="https://arxiv.org/pdf/2502.08505">pdf</a>, <a href="https://arxiv.org/format/2502.08505">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Bridging Domain Adaptation and Graph Neural Networks: A Tensor-Based Framework for Effective Label Propagation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wen%2C+T">Tao Wen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+E">Elynn Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuzhou Chen</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08505v2-abstract-short" style="display: inline;"> Graph Neural Networks (GNNs) have recently become the predominant tools for studying graph data. Despite state-of-the-art performance on graph classification tasks, GNNs are overwhelmingly trained in a single domain under supervision, thus necessitating a prohibitively high demand for labels and resulting in poorly transferable representations. To address this challenge, we propose the Label-Propa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08505v2-abstract-full').style.display = 'inline'; document.getElementById('2502.08505v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08505v2-abstract-full" style="display: none;"> Graph Neural Networks (GNNs) have recently become the predominant tools for studying graph data. Despite state-of-the-art performance on graph classification tasks, GNNs are overwhelmingly trained in a single domain under supervision, thus necessitating a prohibitively high demand for labels and resulting in poorly transferable representations. To address this challenge, we propose the Label-Propagation Tensor Graph Neural Network (LP-TGNN) framework to bridge the gap between graph data and traditional domain adaptation methods. It extracts graph topological information holistically with a tensor architecture and then reduces domain discrepancy through label propagation. It is readily compatible with general GNNs and domain adaptation techniques with minimal adjustment through pseudo-labeling. Experiments on various real-world benchmarks show that our LP-TGNN outperforms baselines by a notable margin. We also validate and analyze each component of the proposed framework in the ablation study. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08505v2-abstract-full').style.display = 'none'; document.getElementById('2502.08505v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05075">arXiv:2502.05075</a> <span> [<a href="https://arxiv.org/pdf/2502.05075">pdf</a>, <a href="https://arxiv.org/format/2502.05075">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Discrepancies are Virtue: Weak-to-Strong Generalization through Lens of Intrinsic Dimension </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dong%2C+Y">Yijun Dong</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yicheng Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yunai Li</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J+D">Jason D. Lee</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05075v1-abstract-short" style="display: inline;"> Weak-to-strong (W2S) generalization is a type of finetuning (FT) where a strong (large) student model is trained on pseudo-labels generated by a weak teacher. Surprisingly, W2S FT often outperforms the weak teacher. We seek to understand this phenomenon through the observation that FT often occurs in intrinsically low-dimensional spaces. Leveraging the low intrinsic dimensionality of FT, we analyz… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05075v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05075v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05075v1-abstract-full" style="display: none;"> Weak-to-strong (W2S) generalization is a type of finetuning (FT) where a strong (large) student model is trained on pseudo-labels generated by a weak teacher. Surprisingly, W2S FT often outperforms the weak teacher. We seek to understand this phenomenon through the observation that FT often occurs in intrinsically low-dimensional spaces. Leveraging the low intrinsic dimensionality of FT, we analyze W2S in the ridgeless regression setting from a variance reduction perspective. For a strong student - weak teacher pair with sufficiently expressive low-dimensional feature subspaces $\mathcal{V}_s, \mathcal{V}_w$, we provide an exact characterization of the variance that dominates the generalization error of W2S. This unveils a virtue of discrepancy between the strong and weak models in W2S: the variance of the weak teacher is inherited by the strong student in $\mathcal{V}_s \cap \mathcal{V}_w$, while reduced by a factor of $\dim(\mathcal{V}_s)/N$ in the subspace of discrepancy $\mathcal{V}_w \setminus \mathcal{V}_s$ with $N$ pseudo-labels for W2S. Further, our analysis casts light on the sample complexities and the scaling of performance gap recovery in W2S. The analysis is supported with experiments on both synthetic regression problems and real vision tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05075v1-abstract-full').style.display = 'none'; document.getElementById('2502.05075v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.03593">arXiv:2412.03593</a> <span> [<a href="https://arxiv.org/pdf/2412.03593">pdf</a>, <a href="https://arxiv.org/format/2412.03593">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> CovidLLM: A Robust Large Language Model with Missing Value Adaptation and Multi-Objective Learning Strategy for Predicting Disease Severity and Clinical Outcomes in COVID-19 Patients </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+S">Shengjun Zhu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Siyu Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yang Li</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qing Lei</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+H">Hongyan Hou</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+H">Hewei Jiang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+S">Shujuan Guo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+F">Feng Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+R">Rongshang Chen</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+X">Xionglin Fan</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+S">Shengce Tao</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+J">Jiaxin Cai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.03593v1-abstract-short" style="display: inline;"> Coronavirus Disease 2019 (COVID-19), which emerged in 2019, has caused millions of deaths worldwide. Although effective vaccines have been developed to mitigate severe symptoms, certain populations, particularly the elderly and those with comorbidities, remain at high risk for severe outcomes and increased mortality. Consequently, early identification of the severity and clinical outcomes of the d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03593v1-abstract-full').style.display = 'inline'; document.getElementById('2412.03593v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.03593v1-abstract-full" style="display: none;"> Coronavirus Disease 2019 (COVID-19), which emerged in 2019, has caused millions of deaths worldwide. Although effective vaccines have been developed to mitigate severe symptoms, certain populations, particularly the elderly and those with comorbidities, remain at high risk for severe outcomes and increased mortality. Consequently, early identification of the severity and clinical outcomes of the disease in these patients is vital to prevent adverse prognoses. Although traditional machine learning and deep learning models have been widely employed in this area, the potential of large language models (LLMs) remains largely unexplored. Our research focuses primarily on constructing specialized prompts and adopting multi-objective learning strategies. We started by selecting serological indicators that significantly correlate with clinical outcomes and disease severity to serve as input data for the model. Blood test samples often contain numerous missing values, and traditional models generally rely on imputation to handle these gaps in the data. In contrast, LLMs offer the advantage of robust semantic understanding. By setting prompts, we can explicitly inform the model when a feature's value is missing, without the need for imputation. For the multi-objective learning strategy, the model is designed to first predict disease severity and then predict clinical outcomes. Given that LLMs utilize both the input text and the generated tokens as input for generating the next token, the predicted severity is used as a basis for generating the clinical outcome. During the fine-tuning of the LLM, the two objectives influence and improve each other. Our experiments were implemented based on the ChatGLM model. The results demonstrate the effectiveness of LLMs in this task, suggesting promising potential for further development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03593v1-abstract-full').style.display = 'none'; document.getElementById('2412.03593v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15583">arXiv:2411.15583</a> <span> [<a href="https://arxiv.org/pdf/2411.15583">pdf</a>, <a href="https://arxiv.org/format/2411.15583">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Exploring Viewing Modalities in Cinematic Virtual Reality: A Systematic Review and Meta-Analysis of Challenges in Evaluating User Experience </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yawen Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Han Zhou</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zhoumingju Jiang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Z">Zilu Tang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+T">Tao Luo</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qinyuan Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15583v1-abstract-short" style="display: inline;"> Cinematic Virtual Reality (CVR) is a narrative-driven VR experience that uses head-mounted displays with a 360-degree field of view. Previous research has explored different viewing modalities to enhance viewers' CVR experience. This study conducted a systematic review and meta-analysis focusing on how different viewing modalities, including intervened rotation, avatar assistance, guidance cues, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15583v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15583v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15583v1-abstract-full" style="display: none;"> Cinematic Virtual Reality (CVR) is a narrative-driven VR experience that uses head-mounted displays with a 360-degree field of view. Previous research has explored different viewing modalities to enhance viewers' CVR experience. This study conducted a systematic review and meta-analysis focusing on how different viewing modalities, including intervened rotation, avatar assistance, guidance cues, and perspective shifting, influence the CVR experience. The study has screened 3444 papers (between 01/01/2013 and 17/06/2023) and selected 45 for systematic review, 13 of which also for meta-analysis. We conducted separate random-effects meta-analysis and applied Robust Variance Estimation to examine CVR viewing modalities and user experience outcomes. Evidence from experiments was synthesized as differences between standardized mean differences (SMDs) of user experience of control group ("Swivel-Chair" CVR) and experiment groups. To our surprise, we found inconsistencies in the effect sizes across different studies, even with the same viewing modalities. Moreover, in these studies, terms such as "presence," "immersion," and "narrative engagement" were often used interchangeably. Their irregular use of questionnaires, overreliance on self-developed questionnaires, and incomplete data reporting may have led to unrigorous evaluations of CVR experiences. This study contributes to Human-Computer Interaction (HCI) research by identifying gaps in CVR research, emphasizing the need for standardization of terminologies and methodologies to enhance the reliability and comparability of future CVR research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15583v1-abstract-full').style.display = 'none'; document.getElementById('2411.15583v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">29 pages, recommend for acceptance by CSCW</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14086">arXiv:2411.14086</a> <span> [<a href="https://arxiv.org/pdf/2411.14086">pdf</a>, <a href="https://arxiv.org/format/2411.14086">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Path Tracking Hybrid A* For Autonomous Agricultural Vehicles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+M">Mingke Lu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+H">Han Gao</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+H">Haijie Dai</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qianli Lei</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14086v1-abstract-short" style="display: inline;"> We propose a path-tracking Hybrid A* planner and a coupled hierarchical Model Predictive Control (MPC) controller in scenarios involving the path smoothing of agricultural vehicles. For agricultural vehicles following reference paths on farmlands, especially during cross-furrow operations, a minimum deviation from the reference path is desired, in addition to the curvature constraints and body sca… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14086v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14086v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14086v1-abstract-full" style="display: none;"> We propose a path-tracking Hybrid A* planner and a coupled hierarchical Model Predictive Control (MPC) controller in scenarios involving the path smoothing of agricultural vehicles. For agricultural vehicles following reference paths on farmlands, especially during cross-furrow operations, a minimum deviation from the reference path is desired, in addition to the curvature constraints and body scale collision avoidance. Our contribution is threefold. (1) We propose the path-tracking Hybrid A*, which satisfies nonholonomic constraints and vehicle size collision avoidance, and devise new cost and heuristic functions to minimize the deviation degree. The path-tracking Hybrid A* can not only function in offline smoothing but also the real-time adjustment when confronted with unexpected obstacles. (2) We propose the hierarchical MPC to safely track the smoothed trajectory, using the initial solution solved by linearized MPC and nonlinear local adjustments around the initial solution. (3) We carry out extensive simulations with baseline comparisons based on real-world farm datasets to evaluate the performance of our algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14086v1-abstract-full').style.display = 'none'; document.getElementById('2411.14086v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03746">arXiv:2411.03746</a> <span> [<a href="https://arxiv.org/pdf/2411.03746">pdf</a>, <a href="https://arxiv.org/format/2411.03746">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Optimal Defenses Against Gradient Reconstruction Attacks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuxiao Chen</a>, <a href="/search/cs?searchtype=author&query=G%C3%BCrsoy%2C+G">Gamze G眉rsoy</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03746v1-abstract-short" style="display: inline;"> Federated Learning (FL) is designed to prevent data leakage through collaborative model training without centralized data storage. However, it remains vulnerable to gradient reconstruction attacks that recover original training data from shared gradients. To optimize the trade-off between data leakage and utility loss, we first derive a theoretical lower bound of reconstruction error (among all at… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03746v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03746v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03746v1-abstract-full" style="display: none;"> Federated Learning (FL) is designed to prevent data leakage through collaborative model training without centralized data storage. However, it remains vulnerable to gradient reconstruction attacks that recover original training data from shared gradients. To optimize the trade-off between data leakage and utility loss, we first derive a theoretical lower bound of reconstruction error (among all attackers) for the two standard methods: adding noise, and gradient pruning. We then customize these two defenses to be parameter- and model-specific and achieve the optimal trade-off between our obtained reconstruction lower bound and model utility. Experimental results validate that our methods outperform Gradient Noise and Gradient Pruning by protecting the training data better while also achieving better utility. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03746v1-abstract-full').style.display = 'none'; document.getElementById('2411.03746v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The code for this project is available at https://github.com/cyx78/Optimal_Defenses_Against_Gradient_Reconstruction_Attacks</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23904">arXiv:2410.23904</a> <span> [<a href="https://arxiv.org/pdf/2410.23904">pdf</a>, <a href="https://arxiv.org/format/2410.23904">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> EZ-HOI: VLM Adaptation via Guided Prompt Learning for Zero-Shot HOI Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qinqian Lei</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bo Wang</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+R+T">Robby T. Tan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23904v3-abstract-short" style="display: inline;"> Detecting Human-Object Interactions (HOI) in zero-shot settings, where models must handle unseen classes, poses significant challenges. Existing methods that rely on aligning visual encoders with large Vision-Language Models (VLMs) to tap into the extensive knowledge of VLMs, require large, computationally expensive models and encounter training difficulties. Adapting VLMs with prompt learning off… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23904v3-abstract-full').style.display = 'inline'; document.getElementById('2410.23904v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23904v3-abstract-full" style="display: none;"> Detecting Human-Object Interactions (HOI) in zero-shot settings, where models must handle unseen classes, poses significant challenges. Existing methods that rely on aligning visual encoders with large Vision-Language Models (VLMs) to tap into the extensive knowledge of VLMs, require large, computationally expensive models and encounter training difficulties. Adapting VLMs with prompt learning offers an alternative to direct alignment. However, fine-tuning on task-specific datasets often leads to overfitting to seen classes and suboptimal performance on unseen classes, due to the absence of unseen class labels. To address these challenges, we introduce a novel prompt learning-based framework for Efficient Zero-Shot HOI detection (EZ-HOI). First, we introduce Large Language Model (LLM) and VLM guidance for learnable prompts, integrating detailed HOI descriptions and visual semantics to adapt VLMs to HOI tasks. However, because training datasets contain seen-class labels alone, fine-tuning VLMs on such datasets tends to optimize learnable prompts for seen classes instead of unseen ones. Therefore, we design prompt learning for unseen classes using information from related seen classes, with LLMs utilized to highlight the differences between unseen and related seen classes. Quantitative evaluations on benchmark datasets demonstrate that our EZ-HOI achieves state-of-the-art performance across various zero-shot settings with only 10.35% to 33.95% of the trainable parameters compared to existing methods. Code is available at https://github.com/ChelsieLei/EZ-HOI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23904v3-abstract-full').style.display = 'none'; document.getElementById('2410.23904v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21331">arXiv:2410.21331</a> <span> [<a href="https://arxiv.org/pdf/2410.21331">pdf</a>, <a href="https://arxiv.org/format/2410.21331">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Beyond Interpretability: The Gains of Feature Monosemanticity on Model Robustness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yifei Wang</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+J">Jingyi Cui</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+X">Xiang Pan</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Jegelka%2C+S">Stefanie Jegelka</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yisen Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21331v1-abstract-short" style="display: inline;"> Deep learning models often suffer from a lack of interpretability due to polysemanticity, where individual neurons are activated by multiple unrelated semantics, resulting in unclear attributions of model behavior. Recent advances in monosemanticity, where neurons correspond to consistent and distinct semantics, have significantly improved interpretability but are commonly believed to compromise a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21331v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21331v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21331v1-abstract-full" style="display: none;"> Deep learning models often suffer from a lack of interpretability due to polysemanticity, where individual neurons are activated by multiple unrelated semantics, resulting in unclear attributions of model behavior. Recent advances in monosemanticity, where neurons correspond to consistent and distinct semantics, have significantly improved interpretability but are commonly believed to compromise accuracy. In this work, we challenge the prevailing belief of the accuracy-interpretability tradeoff, showing that monosemantic features not only enhance interpretability but also bring concrete gains in model performance. Across multiple robust learning scenarios-including input and label noise, few-shot learning, and out-of-domain generalization-our results show that models leveraging monosemantic features significantly outperform those relying on polysemantic features. Furthermore, we provide empirical and theoretical understandings on the robustness gains of feature monosemanticity. Our preliminary analysis suggests that monosemanticity, by promoting better separation of feature representations, leads to more robust decision boundaries. This diverse evidence highlights the generality of monosemanticity in improving model robustness. As a first step in this new direction, we embark on exploring the learning benefits of monosemanticity beyond interpretability, supporting the long-standing hypothesis of linking interpretability and robustness. Code is available at \url{https://github.com/PKU-ML/Beyond_Interpretability}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21331v1-abstract-full').style.display = 'none'; document.getElementById('2410.21331v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.19126">arXiv:2407.19126</a> <span> [<a href="https://arxiv.org/pdf/2407.19126">pdf</a>, <a href="https://arxiv.org/format/2407.19126">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Greedy Output Approximation: Towards Efficient Structured Pruning for LLMs Without Retraining </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+J">Jianwei Li</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Y">Yijun Dong</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.19126v1-abstract-short" style="display: inline;"> To remove redundant components of large language models (LLMs) without incurring significant computational costs, this work focuses on single-shot pruning without a retraining phase. We simplify the pruning process for Transformer-based LLMs by identifying a depth-2 pruning structure that functions independently. Additionally, we propose two inference-aware pruning criteria derived from the optimi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19126v1-abstract-full').style.display = 'inline'; document.getElementById('2407.19126v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.19126v1-abstract-full" style="display: none;"> To remove redundant components of large language models (LLMs) without incurring significant computational costs, this work focuses on single-shot pruning without a retraining phase. We simplify the pruning process for Transformer-based LLMs by identifying a depth-2 pruning structure that functions independently. Additionally, we propose two inference-aware pruning criteria derived from the optimization perspective of output approximation, which outperforms traditional training-aware metrics such as gradient and Hessian. We also introduce a two-step reconstruction technique to mitigate pruning errors without model retraining. Experimental results demonstrate that our approach significantly reduces computational costs and hardware requirements while maintaining superior performance across various datasets and models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19126v1-abstract-full').style.display = 'none'; document.getElementById('2407.19126v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.08209">arXiv:2407.08209</a> <span> [<a href="https://arxiv.org/pdf/2407.08209">pdf</a>, <a href="https://arxiv.org/format/2407.08209">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Enriching Information and Preserving Semantic Consistency in Expanding Curvilinear Object Segmentation Datasets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qin Lei</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+J">Jiang Zhong</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+Q">Qizhu Dai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.08209v1-abstract-short" style="display: inline;"> Curvilinear object segmentation plays a crucial role across various applications, yet datasets in this domain often suffer from small scale due to the high costs associated with data acquisition and annotation. To address these challenges, this paper introduces a novel approach for expanding curvilinear object segmentation datasets, focusing on enhancing the informativeness of generated data and t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08209v1-abstract-full').style.display = 'inline'; document.getElementById('2407.08209v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.08209v1-abstract-full" style="display: none;"> Curvilinear object segmentation plays a crucial role across various applications, yet datasets in this domain often suffer from small scale due to the high costs associated with data acquisition and annotation. To address these challenges, this paper introduces a novel approach for expanding curvilinear object segmentation datasets, focusing on enhancing the informativeness of generated data and the consistency between semantic maps and generated images. Our method enriches synthetic data informativeness by generating curvilinear objects through their multiple textual features. By combining textual features from each sample in original dataset, we obtain synthetic images that beyond the original dataset's distribution. This initiative necessitated the creation of the Curvilinear Object Segmentation based on Text Generation (COSTG) dataset. Designed to surpass the limitations of conventional datasets, COSTG incorporates not only standard semantic maps but also some textual descriptions of curvilinear object features. To ensure consistency between synthetic semantic maps and images, we introduce the Semantic Consistency Preserving ControlNet (SCP ControlNet). This involves an adaptation of ControlNet with Spatially-Adaptive Normalization (SPADE), allowing it to preserve semantic information that would typically be washed away in normalization layers. This modification facilitates more accurate semantic image synthesis. Experimental results demonstrate the efficacy of our approach across three types of curvilinear objects (angiography, crack and retina) and six public datasets (CHUAC, XCAD, DCA1, DRIVE, CHASEDB1 and Crack500). The synthetic data generated by our method not only expand the dataset, but also effectively improves the performance of other curvilinear object segmentation models. Source code and dataset are available at \url{https://github.com/tanlei0/COSTG}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08209v1-abstract-full').style.display = 'none'; document.getElementById('2407.08209v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECCV 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.06120">arXiv:2407.06120</a> <span> [<a href="https://arxiv.org/pdf/2407.06120">pdf</a>, <a href="https://arxiv.org/format/2407.06120">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Sketchy Moment Matching: Toward Fast and Provable Data Selection for Finetuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dong%2C+Y">Yijun Dong</a>, <a href="/search/cs?searchtype=author&query=Phan%2C+H">Hoang Phan</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+X">Xiang Pan</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.06120v3-abstract-short" style="display: inline;"> We revisit data selection in a modern context of finetuning from a fundamental perspective. Extending the classical wisdom of variance minimization in low dimensions to high-dimensional finetuning, our generalization analysis unveils the importance of additionally reducing bias induced by low-rank approximation. Inspired by the variance-bias tradeoff in high dimensions from the theory, we introduc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06120v3-abstract-full').style.display = 'inline'; document.getElementById('2407.06120v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.06120v3-abstract-full" style="display: none;"> We revisit data selection in a modern context of finetuning from a fundamental perspective. Extending the classical wisdom of variance minimization in low dimensions to high-dimensional finetuning, our generalization analysis unveils the importance of additionally reducing bias induced by low-rank approximation. Inspired by the variance-bias tradeoff in high dimensions from the theory, we introduce Sketchy Moment Matching (SkMM), a scalable data selection scheme with two stages. (i) First, the bias is controlled using gradient sketching that explores the finetuning parameter space for an informative low-dimensional subspace $\mathcal{S}$; (ii) then the variance is reduced over $\mathcal{S}$ via moment matching between the original and selected datasets. Theoretically, we show that gradient sketching is fast and provably accurate: selecting $n$ samples by reducing variance over $\mathcal{S}$ preserves the fast-rate generalization $O(\dim(\mathcal{S})/n)$, independent of the parameter dimension. Empirically, we concretize the variance-bias balance via synthetic experiments and demonstrate the effectiveness of SkMM for finetuning in real vision tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06120v3-abstract-full').style.display = 'none'; document.getElementById('2407.06120v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.19617">arXiv:2406.19617</a> <span> [<a href="https://arxiv.org/pdf/2406.19617">pdf</a>, <a href="https://arxiv.org/ps/2406.19617">ps</a>, <a href="https://arxiv.org/format/2406.19617">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Stochastic Zeroth-Order Optimization under Strongly Convexity and Lipschitz Hessian: Minimax Sample Complexity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+Q">Qian Yu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yining Wang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+B">Baihe Huang</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J+D">Jason D. Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.19617v1-abstract-short" style="display: inline;"> Optimization of convex functions under stochastic zeroth-order feedback has been a major and challenging question in online learning. In this work, we consider the problem of optimizing second-order smooth and strongly convex functions where the algorithm is only accessible to noisy evaluations of the objective function it queries. We provide the first tight characterization for the rate of the mi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19617v1-abstract-full').style.display = 'inline'; document.getElementById('2406.19617v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.19617v1-abstract-full" style="display: none;"> Optimization of convex functions under stochastic zeroth-order feedback has been a major and challenging question in online learning. In this work, we consider the problem of optimizing second-order smooth and strongly convex functions where the algorithm is only accessible to noisy evaluations of the objective function it queries. We provide the first tight characterization for the rate of the minimax simple regret by developing matching upper and lower bounds. We propose an algorithm that features a combination of a bootstrapping stage and a mirror-descent stage. Our main technical innovation consists of a sharp characterization for the spherical-sampling gradient estimator under higher-order smoothness conditions, which allows the algorithm to optimally balance the bias-variance tradeoff, and a new iterative method for the bootstrapping stage, which maintains the performance for unbounded Hessian. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.19617v1-abstract-full').style.display = 'none'; document.getElementById('2406.19617v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.09164">arXiv:2403.09164</a> <span> [<a href="https://arxiv.org/pdf/2403.09164">pdf</a>, <a href="https://arxiv.org/format/2403.09164">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Exploring the Comprehension of ChatGPT in Traditional Chinese Medicine Knowledge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yizhen%2C+L">Li Yizhen</a>, <a href="/search/cs?searchtype=author&query=Shaohan%2C+H">Huang Shaohan</a>, <a href="/search/cs?searchtype=author&query=Jiaxing%2C+Q">Qi Jiaxing</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Quan Lei</a>, <a href="/search/cs?searchtype=author&query=Dongran%2C+H">Han Dongran</a>, <a href="/search/cs?searchtype=author&query=Zhongzhi%2C+L">Luan Zhongzhi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.09164v1-abstract-short" style="display: inline;"> No previous work has studied the performance of Large Language Models (LLMs) in the context of Traditional Chinese Medicine (TCM), an essential and distinct branch of medical knowledge with a rich history. To bridge this gap, we present a TCM question dataset named TCM-QA, which comprises three question types: single choice, multiple choice, and true or false, to examine the LLM's capacity for kno… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.09164v1-abstract-full').style.display = 'inline'; document.getElementById('2403.09164v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.09164v1-abstract-full" style="display: none;"> No previous work has studied the performance of Large Language Models (LLMs) in the context of Traditional Chinese Medicine (TCM), an essential and distinct branch of medical knowledge with a rich history. To bridge this gap, we present a TCM question dataset named TCM-QA, which comprises three question types: single choice, multiple choice, and true or false, to examine the LLM's capacity for knowledge recall and comprehensive reasoning within the TCM domain. In our study, we evaluate two settings of the LLM, zero-shot and few-shot settings, while concurrently discussing the differences between English and Chinese prompts. Our results indicate that ChatGPT performs best in true or false questions, achieving the highest precision of 0.688 while scoring the lowest precision is 0.241 in multiple-choice questions. Furthermore, we observed that Chinese prompts outperformed English prompts in our evaluations. Additionally, we assess the quality of explanations generated by ChatGPT and their potential contribution to TCM knowledge comprehension. This paper offers valuable insights into the applicability of LLMs in specialized domains and paves the way for future research in leveraging these powerful models to advance TCM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.09164v1-abstract-full').style.display = 'none'; document.getElementById('2403.09164v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.06424">arXiv:2403.06424</a> <span> [<a href="https://arxiv.org/pdf/2403.06424">pdf</a>, <a href="https://arxiv.org/format/2403.06424">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Bridging Domains with Approximately Shared Features </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhong%2C+Z+S">Ziliang Samuel Zhong</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+X">Xiang Pan</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.06424v1-abstract-short" style="display: inline;"> Multi-source domain adaptation aims to reduce performance degradation when applying machine learning models to unseen domains. A fundamental challenge is devising the optimal strategy for feature selection. Existing literature is somewhat paradoxical: some advocate for learning invariant features from source domains, while others favor more diverse features. To address the challenge, we propose a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.06424v1-abstract-full').style.display = 'inline'; document.getElementById('2403.06424v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.06424v1-abstract-full" style="display: none;"> Multi-source domain adaptation aims to reduce performance degradation when applying machine learning models to unseen domains. A fundamental challenge is devising the optimal strategy for feature selection. Existing literature is somewhat paradoxical: some advocate for learning invariant features from source domains, while others favor more diverse features. To address the challenge, we propose a statistical framework that distinguishes the utilities of features based on the variance of their correlation to label $y$ across domains. Under our framework, we design and analyze a learning procedure consisting of learning approximately shared feature representation from source tasks and fine-tuning it on the target task. Our theoretical analysis necessitates the importance of learning approximately shared features instead of only the strictly invariant features and yields an improved population risk compared to previous results on both source and target tasks, thus partly resolving the paradox mentioned above. Inspired by our theory, we proposed a more practical way to isolate the content (invariant+approximately shared) from environmental features and further consolidate our theoretical findings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.06424v1-abstract-full').style.display = 'none'; document.getElementById('2403.06424v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.02695">arXiv:2403.02695</a> <span> [<a href="https://arxiv.org/pdf/2403.02695">pdf</a>, <a href="https://arxiv.org/format/2403.02695">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Controllable Prompt Tuning For Balancing Group Distributional Robustness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Phan%2C+H">Hoang Phan</a>, <a href="/search/cs?searchtype=author&query=Wilson%2C+A+G">Andrew Gordon Wilson</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.02695v2-abstract-short" style="display: inline;"> Models trained on data composed of different groups or domains can suffer from severe performance degradation under distribution shifts. While recent methods have largely focused on optimizing the worst-group objective, this often comes at the expense of good performance on other groups. To address this problem, we introduce an optimization scheme to achieve good performance across groups and find… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.02695v2-abstract-full').style.display = 'inline'; document.getElementById('2403.02695v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.02695v2-abstract-full" style="display: none;"> Models trained on data composed of different groups or domains can suffer from severe performance degradation under distribution shifts. While recent methods have largely focused on optimizing the worst-group objective, this often comes at the expense of good performance on other groups. To address this problem, we introduce an optimization scheme to achieve good performance across groups and find a good solution for all without severely sacrificing performance on any of them. However, directly applying such optimization involves updating the parameters of the entire network, making it both computationally expensive and challenging. Thus, we introduce Controllable Prompt Tuning (CPT), which couples our approach with prompt-tuning techniques. On spurious correlation benchmarks, our procedures achieve state-of-the-art results across both transformer and non-transformer architectures, as well as unimodal and multimodal data, while requiring only 0.4% tunable parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.02695v2-abstract-full').style.display = 'none'; document.getElementById('2403.02695v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Proceedings of the 41st International Conference on Machine Learning</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.09478">arXiv:2402.09478</a> <span> [<a href="https://arxiv.org/pdf/2402.09478">pdf</a>, <a href="https://arxiv.org/format/2402.09478">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Data Reconstruction Attacks and Defenses: A Systematic Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+S">Sheng Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zihan Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuxiao Chen</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.09478v2-abstract-short" style="display: inline;"> Reconstruction attacks and defenses are essential in understanding the data leakage problem in machine learning. However, prior work has centered around empirical observations of gradient inversion attacks, lacks theoretical justifications, and cannot disentangle the usefulness of defending methods from the computational limitation of attacking methods. In this work, we propose to view the problem… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09478v2-abstract-full').style.display = 'inline'; document.getElementById('2402.09478v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.09478v2-abstract-full" style="display: none;"> Reconstruction attacks and defenses are essential in understanding the data leakage problem in machine learning. However, prior work has centered around empirical observations of gradient inversion attacks, lacks theoretical justifications, and cannot disentangle the usefulness of defending methods from the computational limitation of attacking methods. In this work, we propose to view the problem as an inverse problem, enabling us to theoretically, quantitatively, and systematically evaluate the data reconstruction problem. On various defense methods, we derived the algorithmic upper bound and the matching (in feature dimension and model width) information-theoretical lower bound on the reconstruction error for two-layer neural networks. To complement the theoretical results and investigate the utility-privacy trade-off, we defined a natural evaluation metric of the defense methods with similar utility loss among the strongest attacks. We further propose a strong reconstruction attack that helps update some previous understanding of the strength of defense methods under our proposed evaluation metric. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09478v2-abstract-full').style.display = 'none'; document.getElementById('2402.09478v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.15530">arXiv:2401.15530</a> <span> [<a href="https://arxiv.org/pdf/2401.15530">pdf</a>, <a href="https://arxiv.org/ps/2401.15530">ps</a>, <a href="https://arxiv.org/format/2401.15530">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> An Information-Theoretic Analysis of In-Context Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jeon%2C+H+J">Hong Jun Jeon</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J+D">Jason D. Lee</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Van+Roy%2C+B">Benjamin Van Roy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.15530v1-abstract-short" style="display: inline;"> Previous theoretical results pertaining to meta-learning on sequences build on contrived assumptions and are somewhat convoluted. We introduce new information-theoretic tools that lead to an elegant and very general decomposition of error into three components: irreducible error, meta-learning error, and intra-task error. These tools unify analyses across many meta-learning challenges. To illustra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.15530v1-abstract-full').style.display = 'inline'; document.getElementById('2401.15530v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.15530v1-abstract-full" style="display: none;"> Previous theoretical results pertaining to meta-learning on sequences build on contrived assumptions and are somewhat convoluted. We introduce new information-theoretic tools that lead to an elegant and very general decomposition of error into three components: irreducible error, meta-learning error, and intra-task error. These tools unify analyses across many meta-learning challenges. To illustrate, we apply them to establish new results about in-context learning with transformers. Our theoretical results characterizes how error decays in both the number of training sequences and sequence lengths. Our results are very general; for example, they avoid contrived mixing time assumptions made by all prior results that establish decay of error with sequence length. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.15530v1-abstract-full').style.display = 'none'; document.getElementById('2401.15530v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.10586">arXiv:2312.10586</a> <span> [<a href="https://arxiv.org/pdf/2312.10586">pdf</a>, <a href="https://arxiv.org/format/2312.10586">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Few-Shot Learning from Augmented Label-Uncertain Queries in Bongard-HOI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qinqian Lei</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bo Wang</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+R+T">Robby T. Tan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.10586v1-abstract-short" style="display: inline;"> Detecting human-object interactions (HOI) in a few-shot setting remains a challenge. Existing meta-learning methods struggle to extract representative features for classification due to the limited data, while existing few-shot HOI models rely on HOI text labels for classification. Moreover, some query images may display visual similarity to those outside their class, such as similar backgrounds b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10586v1-abstract-full').style.display = 'inline'; document.getElementById('2312.10586v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.10586v1-abstract-full" style="display: none;"> Detecting human-object interactions (HOI) in a few-shot setting remains a challenge. Existing meta-learning methods struggle to extract representative features for classification due to the limited data, while existing few-shot HOI models rely on HOI text labels for classification. Moreover, some query images may display visual similarity to those outside their class, such as similar backgrounds between different HOI classes. This makes learning more challenging, especially with limited samples. Bongard-HOI (Jiang et al. 2022) epitomizes this HOI few-shot problem, making it the benchmark we focus on in this paper. In our proposed method, we introduce novel label-uncertain query augmentation techniques to enhance the diversity of the query inputs, aiming to distinguish the positive HOI class from the negative ones. As these augmented inputs may or may not have the same class label as the original inputs, their class label is unknown. Those belonging to a different class become hard samples due to their visual similarity to the original ones. Additionally, we introduce a novel pseudo-label generation technique that enables a mean teacher model to learn from the augmented label-uncertain inputs. We propose to augment the negative support set for the student model to enrich the semantic information, fostering diversity that challenges and enhances the student's learning. Experimental results demonstrate that our method sets a new state-of-the-art (SOTA) performance by achieving 68.74% accuracy on the Bongard-HOI benchmark, a significant improvement over the existing SOTA of 66.59%. In our evaluation on HICO-FS, a more general few-shot recognition dataset, our method achieves 73.27% accuracy, outperforming the previous SOTA of 71.20% in the 5-way 5-shot task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.10586v1-abstract-full').style.display = 'none'; document.getElementById('2312.10586v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.05720">arXiv:2312.05720</a> <span> [<a href="https://arxiv.org/pdf/2312.05720">pdf</a>, <a href="https://arxiv.org/format/2312.05720">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Beyond Gradient and Priors in Privacy Attacks: Leveraging Pooler Layer Inputs of Language Models in Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+J">Jianwei Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Sheng Liu</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.05720v4-abstract-short" style="display: inline;"> Language models trained via federated learning (FL) demonstrate impressive capabilities in handling complex tasks while protecting user privacy. Recent studies indicate that leveraging gradient information and prior knowledge can potentially reveal training samples within FL setting. However, these investigations have overlooked the potential privacy risks tied to the intrinsic architecture of the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.05720v4-abstract-full').style.display = 'inline'; document.getElementById('2312.05720v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.05720v4-abstract-full" style="display: none;"> Language models trained via federated learning (FL) demonstrate impressive capabilities in handling complex tasks while protecting user privacy. Recent studies indicate that leveraging gradient information and prior knowledge can potentially reveal training samples within FL setting. However, these investigations have overlooked the potential privacy risks tied to the intrinsic architecture of the models. This paper presents a two-stage privacy attack strategy that targets the vulnerabilities in the architecture of contemporary language models, significantly enhancing attack performance by initially recovering certain feature directions as additional supervisory signals. Our comparative experiments demonstrate superior attack performance across various datasets and scenarios, highlighting the privacy leakage risk associated with the increasingly complex architectures of language models. We call for the community to recognize and address these potential privacy risks in designing large language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.05720v4-abstract-full').style.display = 'none'; document.getElementById('2312.05720v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.13191">arXiv:2310.13191</a> <span> [<a href="https://arxiv.org/pdf/2310.13191">pdf</a>, <a href="https://arxiv.org/format/2310.13191">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards Robust Pruning: An Adaptive Knowledge-Retention Pruning Strategy for Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+J">Jianwei Li</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+W">Wei Cheng</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+D">Dongkuan Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.13191v3-abstract-short" style="display: inline;"> The pruning objective has recently extended beyond accuracy and sparsity to robustness in language models. Despite this, existing methods struggle to enhance robustness against adversarial attacks when continually increasing model sparsity and require a retraining process. As humans step into the era of large language models, these issues become increasingly prominent. This paper proposes that the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13191v3-abstract-full').style.display = 'inline'; document.getElementById('2310.13191v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.13191v3-abstract-full" style="display: none;"> The pruning objective has recently extended beyond accuracy and sparsity to robustness in language models. Despite this, existing methods struggle to enhance robustness against adversarial attacks when continually increasing model sparsity and require a retraining process. As humans step into the era of large language models, these issues become increasingly prominent. This paper proposes that the robustness of language models is proportional to the extent of pre-trained knowledge they encompass. Accordingly, we introduce a post-training pruning strategy designed to faithfully replicate the embedding space and feature space of dense language models, aiming to conserve more pre-trained knowledge during the pruning process. In this setup, each layer's reconstruction error not only originates from itself but also includes cumulative error from preceding layers, followed by an adaptive rectification. Compared to other state-of-art baselines, our approach demonstrates a superior balance between accuracy, sparsity, robustness, and pruning cost with BERT on datasets SST2, IMDB, and AGNews, marking a significant stride towards robust pruning in language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13191v3-abstract-full').style.display = 'none'; document.getElementById('2310.13191v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.13183">arXiv:2310.13183</a> <span> [<a href="https://arxiv.org/pdf/2310.13183">pdf</a>, <a href="https://arxiv.org/format/2310.13183">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Breaking through Deterministic Barriers: Randomized Pruning Mask Generation and Selection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+J">Jianwei Li</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+W">Weizhi Gao</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+D">Dongkuan Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.13183v2-abstract-short" style="display: inline;"> It is widely acknowledged that large and sparse models have higher accuracy than small and dense models under the same model size constraints. This motivates us to train a large model and then remove its redundant neurons or weights by pruning. Most existing works pruned the networks in a deterministic way, the performance of which solely depends on a single pruning criterion and thus lacks variet… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13183v2-abstract-full').style.display = 'inline'; document.getElementById('2310.13183v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.13183v2-abstract-full" style="display: none;"> It is widely acknowledged that large and sparse models have higher accuracy than small and dense models under the same model size constraints. This motivates us to train a large model and then remove its redundant neurons or weights by pruning. Most existing works pruned the networks in a deterministic way, the performance of which solely depends on a single pruning criterion and thus lacks variety. Instead, in this paper, we propose a model pruning strategy that first generates several pruning masks in a designed random way. Subsequently, along with an effective mask-selection rule, the optimal mask is chosen from the pool of mask candidates. To further enhance efficiency, we introduce an early mask evaluation strategy, mitigating the overhead associated with training multiple masks. Our extensive experiments demonstrate that this approach achieves state-of-the-art performance across eight datasets from GLUE, particularly excelling at high levels of sparsity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.13183v2-abstract-full').style.display = 'none'; document.getElementById('2310.13183v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.11030">arXiv:2307.11030</a> <span> [<a href="https://arxiv.org/pdf/2307.11030">pdf</a>, <a href="https://arxiv.org/format/2307.11030">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Cluster-aware Semi-supervised Learning: Relational Knowledge Distillation Provably Learns Clustering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dong%2C+Y">Yijun Dong</a>, <a href="/search/cs?searchtype=author&query=Miller%2C+K">Kevin Miller</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Ward%2C+R">Rachel Ward</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.11030v2-abstract-short" style="display: inline;"> Despite the empirical success and practical significance of (relational) knowledge distillation that matches (the relations of) features between teacher and student models, the corresponding theoretical interpretations remain limited for various knowledge distillation paradigms. In this work, we take an initial step toward a theoretical understanding of relational knowledge distillation (RKD), wit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.11030v2-abstract-full').style.display = 'inline'; document.getElementById('2307.11030v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.11030v2-abstract-full" style="display: none;"> Despite the empirical success and practical significance of (relational) knowledge distillation that matches (the relations of) features between teacher and student models, the corresponding theoretical interpretations remain limited for various knowledge distillation paradigms. In this work, we take an initial step toward a theoretical understanding of relational knowledge distillation (RKD), with a focus on semi-supervised classification problems. We start by casting RKD as spectral clustering on a population-induced graph unveiled by a teacher model. Via a notion of clustering error that quantifies the discrepancy between the predicted and ground truth clusterings, we illustrate that RKD over the population provably leads to low clustering error. Moreover, we provide a sample complexity bound for RKD with limited unlabeled samples. For semi-supervised learning, we further demonstrate the label efficiency of RKD through a general framework of cluster-aware semi-supervised learning that assumes low clustering errors. Finally, by unifying data augmentation consistency regularization into this cluster-aware framework, we show that despite the common effect of learning accurate clusterings, RKD facilitates a "global" perspective through spectral clustering, whereas consistency regularization focuses on a "local" perspective via expansion. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.11030v2-abstract-full').style.display = 'none'; document.getElementById('2307.11030v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.12383">arXiv:2306.12383</a> <span> [<a href="https://arxiv.org/pdf/2306.12383">pdf</a>, <a href="https://arxiv.org/ps/2306.12383">ps</a>, <a href="https://arxiv.org/format/2306.12383">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Sample Complexity for Quadratic Bandits: Hessian Dependent Bounds and Optimal Algorithms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+Q">Qian Yu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yining Wang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+B">Baihe Huang</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J+D">Jason D. Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.12383v3-abstract-short" style="display: inline;"> In stochastic zeroth-order optimization, a problem of practical relevance is understanding how to fully exploit the local geometry of the underlying objective function. We consider a fundamental setting in which the objective function is quadratic, and provide the first tight characterization of the optimal Hessian-dependent sample complexity. Our contribution is twofold. First, from an informatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.12383v3-abstract-full').style.display = 'inline'; document.getElementById('2306.12383v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.12383v3-abstract-full" style="display: none;"> In stochastic zeroth-order optimization, a problem of practical relevance is understanding how to fully exploit the local geometry of the underlying objective function. We consider a fundamental setting in which the objective function is quadratic, and provide the first tight characterization of the optimal Hessian-dependent sample complexity. Our contribution is twofold. First, from an information-theoretic point of view, we prove tight lower bounds on Hessian-dependent complexities by introducing a concept called energy allocation, which captures the interaction between the searching algorithm and the geometry of objective functions. A matching upper bound is obtained by solving the optimal energy spectrum. Then, algorithmically, we show the existence of a Hessian-independent algorithm that universally achieves the asymptotic optimal sample complexities for all Hessian instances. The optimal sample complexities achieved by our algorithm remain valid for heavy-tailed noise distributions, which are enabled by a truncation method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.12383v3-abstract-full').style.display = 'none'; document.getElementById('2306.12383v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.03714">arXiv:2212.03714</a> <span> [<a href="https://arxiv.org/pdf/2212.03714">pdf</a>, <a href="https://arxiv.org/format/2212.03714">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Reconstructing Training Data from Model Gradient, Provably </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zihan Wang</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J+D">Jason D. Lee</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.03714v3-abstract-short" style="display: inline;"> Understanding when and how much a model gradient leaks information about the training sample is an important question in privacy. In this paper, we present a surprising result: even without training or memorizing the data, we can fully reconstruct the training samples from a single gradient query at a randomly chosen parameter value. We prove the identifiability of the training data under mild con… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.03714v3-abstract-full').style.display = 'inline'; document.getElementById('2212.03714v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.03714v3-abstract-full" style="display: none;"> Understanding when and how much a model gradient leaks information about the training sample is an important question in privacy. In this paper, we present a surprising result: even without training or memorizing the data, we can fully reconstruct the training samples from a single gradient query at a randomly chosen parameter value. We prove the identifiability of the training data under mild conditions: with shallow or deep neural networks and a wide range of activation functions. We also present a statistically and computationally efficient algorithm based on tensor decomposition to reconstruct the training data. As a provable attack that reveals sensitive training data, our findings suggest potential severe threats to privacy, especially in federated learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.03714v3-abstract-full').style.display = 'none'; document.getElementById('2212.03714v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.13983">arXiv:2210.13983</a> <span> [<a href="https://arxiv.org/pdf/2210.13983">pdf</a>, <a href="https://arxiv.org/format/2210.13983">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Optimization for Amortized Inverse Problems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tianci Liu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+T">Tong Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Quan Zhang</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.13983v3-abstract-short" style="display: inline;"> Incorporating a deep generative model as the prior distribution in inverse problems has established substantial success in reconstructing images from corrupted observations. Notwithstanding, the existing optimization approaches use gradient descent largely without adapting to the non-convex nature of the problem and can be sensitive to initial values, impeding further performance improvement. In t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.13983v3-abstract-full').style.display = 'inline'; document.getElementById('2210.13983v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.13983v3-abstract-full" style="display: none;"> Incorporating a deep generative model as the prior distribution in inverse problems has established substantial success in reconstructing images from corrupted observations. Notwithstanding, the existing optimization approaches use gradient descent largely without adapting to the non-convex nature of the problem and can be sensitive to initial values, impeding further performance improvement. In this paper, we propose an efficient amortized optimization scheme for inverse problems with a deep generative prior. Specifically, the optimization task with high degrees of difficulty is decomposed into optimizing a sequence of much easier ones. We provide a theoretical guarantee of the proposed algorithm and empirically validate it on different inverse problems. As a result, our approach outperforms baseline methods qualitatively and quantitatively by a large margin. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.13983v3-abstract-full').style.display = 'none'; document.getElementById('2210.13983v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.14434">arXiv:2209.14434</a> <span> [<a href="https://arxiv.org/pdf/2209.14434">pdf</a>, <a href="https://arxiv.org/format/2209.14434">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Efficient Medical Image Assessment via Self-supervised Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chun-Yin Huang</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaoxiao Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.14434v1-abstract-short" style="display: inline;"> High-performance deep learning methods typically rely on large annotated training datasets, which are difficult to obtain in many clinical applications due to the high cost of medical image labeling. Existing data assessment methods commonly require knowing the labels in advance, which are not feasible to achieve our goal of 'knowing which data to label.' To this end, we formulate and propose a no… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.14434v1-abstract-full').style.display = 'inline'; document.getElementById('2209.14434v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.14434v1-abstract-full" style="display: none;"> High-performance deep learning methods typically rely on large annotated training datasets, which are difficult to obtain in many clinical applications due to the high cost of medical image labeling. Existing data assessment methods commonly require knowing the labels in advance, which are not feasible to achieve our goal of 'knowing which data to label.' To this end, we formulate and propose a novel and efficient data assessment strategy, EXponentiAl Marginal sINgular valuE (EXAMINE) score, to rank the quality of unlabeled medical image data based on their useful latent representations extracted via Self-supervised Learning (SSL) networks. Motivated by theoretical implication of SSL embedding space, we leverage a Masked Autoencoder for feature extraction. Furthermore, we evaluate data quality based on the marginal change of the largest singular value after excluding the data point in the dataset. We conduct extensive experiments on a pathology dataset. Our results indicate the effectiveness and efficiency of our proposed methods for selecting the most valuable data to label. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.14434v1-abstract-full').style.display = 'none'; document.getElementById('2209.14434v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2205.05236">arXiv:2205.05236</a> <span> [<a href="https://arxiv.org/pdf/2205.05236">pdf</a>, <a href="https://arxiv.org/format/2205.05236">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> Reconnecting the Estranged Relationships: Optimizing the Influence Propagation in Evolving Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cai%2C+T">Taotao Cai</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Sheng%2C+Q+Z">Quan Z. Sheng</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Shuiqiao Yang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jian Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W+E">Wei Emma Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2205.05236v1-abstract-short" style="display: inline;"> Influence Maximization (IM), which aims to select a set of users from a social network to maximize the expected number of influenced users, has recently received significant attention for mass communication and commercial marketing. Existing research efforts dedicated to the IM problem depend on a strong assumption: the selected seed users are willing to spread the information after receiving bene… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.05236v1-abstract-full').style.display = 'inline'; document.getElementById('2205.05236v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2205.05236v1-abstract-full" style="display: none;"> Influence Maximization (IM), which aims to select a set of users from a social network to maximize the expected number of influenced users, has recently received significant attention for mass communication and commercial marketing. Existing research efforts dedicated to the IM problem depend on a strong assumption: the selected seed users are willing to spread the information after receiving benefits from a company or organization. In reality, however, some seed users may be reluctant to spread the information, or need to be paid higher to be motivated. Furthermore, the existing IM works pay little attention to capture user's influence propagation in the future period as well. In this paper, we target a new research problem, named Reconnecting Top-l Relationships (RTlR) query, which aims to find l number of previous existing relationships but being stranged later, such that reconnecting these relationships will maximize the expected benefit of influenced users by the given group in a future period. We prove that the RTlR problem is NP-hard. An efficient greedy algorithm is proposed to answer the RTlR queries with the influence estimation technique and the well-chosen link prediction method to predict the near future network structure. We also design a pruning method to reduce unnecessary probing from candidate edges. Further, a carefully designed order-based algorithm is proposed to accelerate the RTlR queries. Finally, we conduct extensive experiments on real-world datasets to demonstrate the effectiveness and efficiency of our proposed methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2205.05236v1-abstract-full').style.display = 'none'; document.getElementById('2205.05236v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.15664">arXiv:2203.15664</a> <span> [<a href="https://arxiv.org/pdf/2203.15664">pdf</a>, <a href="https://arxiv.org/format/2203.15664">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Nearly Minimax Algorithms for Linear Bandits with Shared Representation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jiaqi Yang</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J+D">Jason D. Lee</a>, <a href="/search/cs?searchtype=author&query=Du%2C+S+S">Simon S. Du</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.15664v1-abstract-short" style="display: inline;"> We give novel algorithms for multi-task and lifelong linear bandits with shared representation. Specifically, we consider the setting where we play $M$ linear bandits with dimension $d$, each for $T$ rounds, and these $M$ bandit tasks share a common $k(\ll d)$ dimensional linear representation. For both the multi-task setting where we play the tasks concurrently, and the lifelong setting where we… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.15664v1-abstract-full').style.display = 'inline'; document.getElementById('2203.15664v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.15664v1-abstract-full" style="display: none;"> We give novel algorithms for multi-task and lifelong linear bandits with shared representation. Specifically, we consider the setting where we play $M$ linear bandits with dimension $d$, each for $T$ rounds, and these $M$ bandit tasks share a common $k(\ll d)$ dimensional linear representation. For both the multi-task setting where we play the tasks concurrently, and the lifelong setting where we play tasks sequentially, we come up with novel algorithms that achieve $\widetilde{O}\left(d\sqrt{kMT} + kM\sqrt{T}\right)$ regret bounds, which matches the known minimax regret lower bound up to logarithmic factors and closes the gap in existing results [Yang et al., 2021]. Our main technique include a more efficient estimator for the low-rank linear feature extractor and an accompanied novel analysis for this estimator. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.15664v1-abstract-full').style.display = 'none'; document.getElementById('2203.15664v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2202.12230">arXiv:2202.12230</a> <span> [<a href="https://arxiv.org/pdf/2202.12230">pdf</a>, <a href="https://arxiv.org/format/2202.12230">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Sample Efficiency of Data Augmentation Consistency Regularization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+S">Shuo Yang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Y">Yijun Dong</a>, <a href="/search/cs?searchtype=author&query=Ward%2C+R">Rachel Ward</a>, <a href="/search/cs?searchtype=author&query=Dhillon%2C+I+S">Inderjit S. Dhillon</a>, <a href="/search/cs?searchtype=author&query=Sanghavi%2C+S">Sujay Sanghavi</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2202.12230v2-abstract-short" style="display: inline;"> Data augmentation is popular in the training of large neural networks; currently, however, there is no clear theoretical comparison between different algorithmic choices on how to use augmented data. In this paper, we take a step in this direction - we first present a simple and novel analysis for linear regression with label invariant augmentations, demonstrating that data augmentation consistenc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.12230v2-abstract-full').style.display = 'inline'; document.getElementById('2202.12230v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2202.12230v2-abstract-full" style="display: none;"> Data augmentation is popular in the training of large neural networks; currently, however, there is no clear theoretical comparison between different algorithmic choices on how to use augmented data. In this paper, we take a step in this direction - we first present a simple and novel analysis for linear regression with label invariant augmentations, demonstrating that data augmentation consistency (DAC) is intrinsically more efficient than empirical risk minimization on augmented data (DA-ERM). The analysis is then extended to misspecified augmentations (i.e., augmentations that change the labels), which again demonstrates the merit of DAC over DA-ERM. Further, we extend our analysis to non-linear models (e.g., neural networks) and present generalization bounds. Finally, we perform experiments that make a clean and apples-to-apples comparison (i.e., with no extra modeling or data tweaks) between DAC and DA-ERM using CIFAR-100 and WideResNet; these together demonstrate the superior efficacy of DAC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2202.12230v2-abstract-full').style.display = 'none'; document.getElementById('2202.12230v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 February, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.09020">arXiv:2201.09020</a> <span> [<a href="https://arxiv.org/pdf/2201.09020">pdf</a>, <a href="https://arxiv.org/format/2201.09020">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Bi-CLKT: Bi-Graph Contrastive Learning based Knowledge Tracing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+X">Xiangyu Song</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jianxin Li</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Wei Zhao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yunliang Chen</a>, <a href="/search/cs?searchtype=author&query=Mian%2C+A">Ajmal Mian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.09020v1-abstract-short" style="display: inline;"> The goal of Knowledge Tracing (KT) is to estimate how well students have mastered a concept based on their historical learning of related exercises. The benefit of knowledge tracing is that students' learning plans can be better organised and adjusted, and interventions can be made when necessary. With the recent rise of deep learning, Deep Knowledge Tracing (DKT) has utilised Recurrent Neural Net… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.09020v1-abstract-full').style.display = 'inline'; document.getElementById('2201.09020v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.09020v1-abstract-full" style="display: none;"> The goal of Knowledge Tracing (KT) is to estimate how well students have mastered a concept based on their historical learning of related exercises. The benefit of knowledge tracing is that students' learning plans can be better organised and adjusted, and interventions can be made when necessary. With the recent rise of deep learning, Deep Knowledge Tracing (DKT) has utilised Recurrent Neural Networks (RNNs) to accomplish this task with some success. Other works have attempted to introduce Graph Neural Networks (GNNs) and redefine the task accordingly to achieve significant improvements. However, these efforts suffer from at least one of the following drawbacks: 1) they pay too much attention to details of the nodes rather than to high-level semantic information; 2) they struggle to effectively establish spatial associations and complex structures of the nodes; and 3) they represent either concepts or exercises only, without integrating them. Inspired by recent advances in self-supervised learning, we propose a Bi-Graph Contrastive Learning based Knowledge Tracing (Bi-CLKT) to address these limitations. Specifically, we design a two-layer contrastive learning scheme based on an "exercise-to-exercise" (E2E) relational subgraph. It involves node-level contrastive learning of subgraphs to obtain discriminative representations of exercises, and graph-level contrastive learning to obtain discriminative representations of concepts. Moreover, we designed a joint contrastive loss to obtain better representations and hence better prediction performance. Also, we explored two different variants, using RNN and memory-augmented neural networks as the prediction layer for comparison to obtain better representations of exercises and concepts respectively. Extensive experiments on four real-world datasets show that the proposed Bi-CLKT and its variants outperform other baseline models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.09020v1-abstract-full').style.display = 'none'; document.getElementById('2201.09020v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.02276">arXiv:2111.02276</a> <span> [<a href="https://arxiv.org/pdf/2111.02276">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1089/soro.2021.0185">10.1089/soro.2021.0185 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Origami-inspired soft twisting actuator </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+D">Diancheng Li</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+D">Dongliang Fan</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+R">Renjie Zhu</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qiaozhi Lei</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+Y">Yuxuan Liao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xin Yang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+Y">Yang Pan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zheng Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yang Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Sicong Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hongqiang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.02276v2-abstract-short" style="display: inline;"> Soft actuators have shown great advantages in compliance and morphology matched for manipulation of delicate objects and inspection in a confined space. There is an unmet need for a soft actuator that can provide torsional motion to e.g. enlarge working space and increase degrees of freedom. Towards this goal, we present origami-inspired soft pneumatic actuators (OSPAs) made from silicone. The pro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.02276v2-abstract-full').style.display = 'inline'; document.getElementById('2111.02276v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.02276v2-abstract-full" style="display: none;"> Soft actuators have shown great advantages in compliance and morphology matched for manipulation of delicate objects and inspection in a confined space. There is an unmet need for a soft actuator that can provide torsional motion to e.g. enlarge working space and increase degrees of freedom. Towards this goal, we present origami-inspired soft pneumatic actuators (OSPAs) made from silicone. The prototype can output a rotation of more than one revolution (up to 435掳), more significant than its counterparts. Its rotation ratio (=rotation angle/ aspect ratio) is more than 136掳, about twice the largest one in other literature. We describe the design and fabrication method, build the analytical model and simulation model, and analyze and optimize the parameters. Finally, we demonstrate the potentially extensive utility of the OSPAs through their integration into a gripper capable of simultaneously grasping and lifting fragile or flat objects, a versatile robot arm capable of picking and placing items at the right angle with the twisting actuators, and a soft snake robot capable of changing attitude and directions by torsion of the twisting actuators. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.02276v2-abstract-full').style.display = 'none'; document.getElementById('2111.02276v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 figures. Soft Robotics (2022)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.09507">arXiv:2110.09507</a> <span> [<a href="https://arxiv.org/pdf/2110.09507">pdf</a>, <a href="https://arxiv.org/format/2110.09507">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Provable Hierarchy-Based Meta-Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chua%2C+K">Kurtland Chua</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J+D">Jason D. Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.09507v1-abstract-short" style="display: inline;"> Hierarchical reinforcement learning (HRL) has seen widespread interest as an approach to tractable learning of complex modular behaviors. However, existing work either assume access to expert-constructed hierarchies, or use hierarchy-learning heuristics with no provable guarantees. To address this gap, we analyze HRL in the meta-RL setting, where a learner learns latent hierarchical structure duri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.09507v1-abstract-full').style.display = 'inline'; document.getElementById('2110.09507v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.09507v1-abstract-full" style="display: none;"> Hierarchical reinforcement learning (HRL) has seen widespread interest as an approach to tractable learning of complex modular behaviors. However, existing work either assume access to expert-constructed hierarchies, or use hierarchy-learning heuristics with no provable guarantees. To address this gap, we analyze HRL in the meta-RL setting, where a learner learns latent hierarchical structure during meta-training for use in a downstream task. We consider a tabular setting where natural hierarchical structure is embedded in the transition dynamics. Analogous to supervised meta-learning theory, we provide "diversity conditions" which, together with a tractable optimism-based algorithm, guarantee sample-efficient recovery of this natural hierarchy. Furthermore, we provide regret bounds on a learner using the recovered hierarchy to solve a meta-test task. Our bounds incorporate common notions in HRL literature such as temporal and state/action abstractions, suggesting that our setting and analysis capture important features of HRL in practice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.09507v1-abstract-full').style.display = 'none'; document.getElementById('2110.09507v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.06466">arXiv:2107.06466</a> <span> [<a href="https://arxiv.org/pdf/2107.06466">pdf</a>, <a href="https://arxiv.org/format/2107.06466">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Going Beyond Linear RL: Sample Efficient Neural Function Approximation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+B">Baihe Huang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaixuan Huang</a>, <a href="/search/cs?searchtype=author&query=Kakade%2C+S+M">Sham M. Kakade</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J+D">Jason D. Lee</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Runzhe Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jiaqi Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.06466v2-abstract-short" style="display: inline;"> Deep Reinforcement Learning (RL) powered by neural net approximation of the Q function has had enormous empirical success. While the theory of RL has traditionally focused on linear function approximation (or eluder dimension) approaches, little is known about nonlinear RL with neural net approximations of the Q functions. This is the focus of this work, where we study function approximation with… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.06466v2-abstract-full').style.display = 'inline'; document.getElementById('2107.06466v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.06466v2-abstract-full" style="display: none;"> Deep Reinforcement Learning (RL) powered by neural net approximation of the Q function has had enormous empirical success. While the theory of RL has traditionally focused on linear function approximation (or eluder dimension) approaches, little is known about nonlinear RL with neural net approximations of the Q functions. This is the focus of this work, where we study function approximation with two-layer neural networks (considering both ReLU and polynomial activation functions). Our first result is a computationally and statistically efficient algorithm in the generative model setting under completeness for two-layer neural networks. Our second result considers this setting but under only realizability of the neural net function class. Here, assuming deterministic dynamics, the sample complexity scales linearly in the algebraic dimension. In all cases, our results significantly improve upon what can be attained with linear (or eluder dimension) methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.06466v2-abstract-full').style.display = 'none'; document.getElementById('2107.06466v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 December, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.04518">arXiv:2107.04518</a> <span> [<a href="https://arxiv.org/pdf/2107.04518">pdf</a>, <a href="https://arxiv.org/ps/2107.04518">ps</a>, <a href="https://arxiv.org/format/2107.04518">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Optimal Gradient-based Algorithms for Non-concave Bandit Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+B">Baihe Huang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaixuan Huang</a>, <a href="/search/cs?searchtype=author&query=Kakade%2C+S+M">Sham M. Kakade</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J+D">Jason D. Lee</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Runzhe Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jiaqi Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.04518v1-abstract-short" style="display: inline;"> Bandit problems with linear or concave reward have been extensively studied, but relatively few works have studied bandits with non-concave reward. This work considers a large family of bandit problems where the unknown underlying reward function is non-concave, including the low-rank generalized linear bandit problems and two-layer neural network with polynomial activation bandit problem. For the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.04518v1-abstract-full').style.display = 'inline'; document.getElementById('2107.04518v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.04518v1-abstract-full" style="display: none;"> Bandit problems with linear or concave reward have been extensively studied, but relatively few works have studied bandits with non-concave reward. This work considers a large family of bandit problems where the unknown underlying reward function is non-concave, including the low-rank generalized linear bandit problems and two-layer neural network with polynomial activation bandit problem. For the low-rank generalized linear bandit problem, we provide a minimax-optimal algorithm in the dimension, refuting both conjectures in [LMT21, JWWN19]. Our algorithms are based on a unified zeroth-order optimization paradigm that applies in great generality and attains optimal rates in several structured polynomial settings (in the dimension). We further demonstrate the applicability of our algorithms in RL in the generative model setting, resulting in improved sample complexity over prior approaches. Finally, we show that the standard optimistic algorithms (e.g., UCB) are sub-optimal by dimension factors. In the neural net setting (with polynomial activation functions) with noiseless reward, we provide a bandit algorithm with sample complexity equal to the intrinsic algebraic dimension. Again, we show that optimistic approaches have worse sample complexity, polynomial in the extrinsic dimension (which could be exponentially worse in the polynomial degree). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.04518v1-abstract-full').style.display = 'none'; document.getElementById('2107.04518v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.02377">arXiv:2107.02377</a> <span> [<a href="https://arxiv.org/pdf/2107.02377">pdf</a>, <a href="https://arxiv.org/ps/2107.02377">ps</a>, <a href="https://arxiv.org/format/2107.02377">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> A Short Note on the Relationship of Information Gain and Eluder Dimension </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaixuan Huang</a>, <a href="/search/cs?searchtype=author&query=Kakade%2C+S+M">Sham M. Kakade</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J+D">Jason D. Lee</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.02377v1-abstract-short" style="display: inline;"> Eluder dimension and information gain are two widely used methods of complexity measures in bandit and reinforcement learning. Eluder dimension was originally proposed as a general complexity measure of function classes, but the common examples of where it is known to be small are function spaces (vector spaces). In these cases, the primary tool to upper bound the eluder dimension is the elliptic… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.02377v1-abstract-full').style.display = 'inline'; document.getElementById('2107.02377v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.02377v1-abstract-full" style="display: none;"> Eluder dimension and information gain are two widely used methods of complexity measures in bandit and reinforcement learning. Eluder dimension was originally proposed as a general complexity measure of function classes, but the common examples of where it is known to be small are function spaces (vector spaces). In these cases, the primary tool to upper bound the eluder dimension is the elliptic potential lemma. Interestingly, the elliptic potential lemma also features prominently in the analysis of linear bandits/reinforcement learning and their nonparametric generalization, the information gain. We show that this is not a coincidence -- eluder dimension and information gain are equivalent in a precise sense for reproducing kernel Hilbert spaces. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.02377v1-abstract-full').style.display = 'none'; document.getElementById('2107.02377v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.12108">arXiv:2106.12108</a> <span> [<a href="https://arxiv.org/pdf/2106.12108">pdf</a>, <a href="https://arxiv.org/format/2106.12108">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Near-Optimal Linear Regression under Distribution Shift </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+W">Wei Hu</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J+D">Jason D. Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.12108v1-abstract-short" style="display: inline;"> Transfer learning is essential when sufficient data comes from the source domain, with scarce labeled data from the target domain. We develop estimators that achieve minimax linear risk for linear regression problems under distribution shift. Our algorithms cover different transfer learning settings including covariate shift and model shift. We also consider when data are generated from either lin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.12108v1-abstract-full').style.display = 'inline'; document.getElementById('2106.12108v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.12108v1-abstract-full" style="display: none;"> Transfer learning is essential when sufficient data comes from the source domain, with scarce labeled data from the target domain. We develop estimators that achieve minimax linear risk for linear regression problems under distribution shift. Our algorithms cover different transfer learning settings including covariate shift and model shift. We also consider when data are generated from either linear or general nonlinear models. We show that linear minimax estimators are within an absolute constant of the minimax risk even among nonlinear estimators for various source/target distributions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.12108v1-abstract-full').style.display = 'none'; document.getElementById('2106.12108v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.02221">arXiv:2105.02221</a> <span> [<a href="https://arxiv.org/pdf/2105.02221">pdf</a>, <a href="https://arxiv.org/format/2105.02221">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> How Fine-Tuning Allows for Effective Meta-Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chua%2C+K">Kurtland Chua</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J+D">Jason D. Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.02221v1-abstract-short" style="display: inline;"> Representation learning has been widely studied in the context of meta-learning, enabling rapid learning of new tasks through shared representations. Recent works such as MAML have explored using fine-tuning-based metrics, which measure the ease by which fine-tuning can achieve good performance, as proxies for obtaining representations. We present a theoretical framework for analyzing representati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.02221v1-abstract-full').style.display = 'inline'; document.getElementById('2105.02221v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.02221v1-abstract-full" style="display: none;"> Representation learning has been widely studied in the context of meta-learning, enabling rapid learning of new tasks through shared representations. Recent works such as MAML have explored using fine-tuning-based metrics, which measure the ease by which fine-tuning can achieve good performance, as proxies for obtaining representations. We present a theoretical framework for analyzing representations derived from a MAML-like algorithm, assuming the available tasks use approximately the same underlying representation. We then provide risk bounds on the best predictor found by fine-tuning via gradient descent, demonstrating that the algorithm can provably leverage the shared structure. The upper bound applies to general function classes, which we demonstrate by instantiating the guarantees of our framework in the logistic regression and neural network settings. In contrast, we establish the existence of settings where any algorithm, using a representation trained with no consideration for task-specific fine-tuning, performs as well as a learner with no access to source tasks in the worst case. This separation result underscores the benefit of fine-tuning-based methods, such as MAML, over methods with "frozen representation" objectives in few-shot learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.02221v1-abstract-full').style.display = 'none'; document.getElementById('2105.02221v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.11203">arXiv:2102.11203</a> <span> [<a href="https://arxiv.org/pdf/2102.11203">pdf</a>, <a href="https://arxiv.org/format/2102.11203">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> A Theory of Label Propagation for Subpopulation Shift </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cai%2C+T">Tianle Cai</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+R">Ruiqi Gao</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J+D">Jason D. Lee</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.11203v3-abstract-short" style="display: inline;"> One of the central problems in machine learning is domain adaptation. Unlike past theoretical work, we consider a new model for subpopulation shift in the input or representation space. In this work, we propose a provably effective framework for domain adaptation based on label propagation. In our analysis, we use a simple but realistic expansion assumption, proposed in \citet{wei2021theoretical}.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.11203v3-abstract-full').style.display = 'inline'; document.getElementById('2102.11203v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.11203v3-abstract-full" style="display: none;"> One of the central problems in machine learning is domain adaptation. Unlike past theoretical work, we consider a new model for subpopulation shift in the input or representation space. In this work, we propose a provably effective framework for domain adaptation based on label propagation. In our analysis, we use a simple but realistic expansion assumption, proposed in \citet{wei2021theoretical}. Using a teacher classifier trained on the source domain, our algorithm not only propagates to the target domain but also improves upon the teacher. By leveraging existing generalization bounds, we also obtain end-to-end finite-sample guarantees on the entire algorithm. In addition, we extend our theoretical framework to a more general setting of source-to-target transfer based on a third unlabeled dataset, which can be easily applied in various learning scenarios. Inspired by our theory, we adapt consistency-based semi-supervised learning methods to domain adaptation settings and gain significant improvements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.11203v3-abstract-full').style.display = 'none'; document.getElementById('2102.11203v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.05263">arXiv:2010.05263</a> <span> [<a href="https://arxiv.org/pdf/2010.05263">pdf</a>, <a href="https://arxiv.org/format/2010.05263">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Fast Convergence of Langevin Dynamics on Manifold: Geodesics meet Log-Sobolev </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiao Wang</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Panageas%2C+I">Ioannis Panageas</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.05263v2-abstract-short" style="display: inline;"> Sampling is a fundamental and arguably very important task with numerous applications in Machine Learning. One approach to sample from a high dimensional distribution $e^{-f}$ for some function $f$ is the Langevin Algorithm (LA). Recently, there has been a lot of progress in showing fast convergence of LA even in cases where $f$ is non-convex, notably [53], [39] in which the former paper focuses o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.05263v2-abstract-full').style.display = 'inline'; document.getElementById('2010.05263v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.05263v2-abstract-full" style="display: none;"> Sampling is a fundamental and arguably very important task with numerous applications in Machine Learning. One approach to sample from a high dimensional distribution $e^{-f}$ for some function $f$ is the Langevin Algorithm (LA). Recently, there has been a lot of progress in showing fast convergence of LA even in cases where $f$ is non-convex, notably [53], [39] in which the former paper focuses on functions $f$ defined in $\mathbb{R}^n$ and the latter paper focuses on functions with symmetries (like matrix completion type objectives) with manifold structure. Our work generalizes the results of [53] where $f$ is defined on a manifold $M$ rather than $\mathbb{R}^n$. From technical point of view, we show that KL decreases in a geometric rate whenever the distribution $e^{-f}$ satisfies a log-Sobolev inequality on $M$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.05263v2-abstract-full').style.display = 'none'; document.getElementById('2010.05263v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2008.01064">arXiv:2008.01064</a> <span> [<a href="https://arxiv.org/pdf/2008.01064">pdf</a>, <a href="https://arxiv.org/format/2008.01064">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Predicting What You Already Know Helps: Provable Self-Supervised Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+J+D">Jason D. Lee</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Saunshi%2C+N">Nikunj Saunshi</a>, <a href="/search/cs?searchtype=author&query=Zhuo%2C+J">Jiacheng Zhuo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2008.01064v2-abstract-short" style="display: inline;"> Self-supervised representation learning solves auxiliary prediction tasks (known as pretext tasks) without requiring labeled data to learn useful semantic representations. These pretext tasks are created solely using the input features, such as predicting a missing image patch, recovering the color channels of an image from context, or predicting missing words in text; yet predicting this \textit{… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.01064v2-abstract-full').style.display = 'inline'; document.getElementById('2008.01064v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2008.01064v2-abstract-full" style="display: none;"> Self-supervised representation learning solves auxiliary prediction tasks (known as pretext tasks) without requiring labeled data to learn useful semantic representations. These pretext tasks are created solely using the input features, such as predicting a missing image patch, recovering the color channels of an image from context, or predicting missing words in text; yet predicting this \textit{known} information helps in learning representations effective for downstream prediction tasks. We posit a mechanism exploiting the statistical connections between certain {\em reconstruction-based} pretext tasks that guarantee to learn a good representation. Formally, we quantify how the approximate independence between the components of the pretext task (conditional on the label and latent variables) allows us to learn representations that can solve the downstream task by just training a linear layer on top of the learned representation. We prove the linear layer yields small approximation error even for complex ground truth function class and will drastically reduce labeled sample complexity. Next, we show a simple modification of our method leads to nonlinear CCA, analogous to the popular SimSiam algorithm, and show similar guarantees for nonlinear CCA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2008.01064v2-abstract-full').style.display = 'none'; document.getElementById('2008.01064v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.07244">arXiv:2007.07244</a> <span> [<a href="https://arxiv.org/pdf/2007.07244">pdf</a>, <a href="https://arxiv.org/ps/2007.07244">ps</a>, <a href="https://arxiv.org/format/2007.07244">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Transformer-XL Based Music Generation with Multiple Sequences of Time-valued Notes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xianchao Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chengyuan Wang</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qinying Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.07244v1-abstract-short" style="display: inline;"> Current state-of-the-art AI based classical music creation algorithms such as Music Transformer are trained by employing single sequence of notes with time-shifts. The major drawback of absolute time interval expression is the difficulty of similarity computing of notes that share the same note value yet different tempos, in one or among MIDI files. In addition, the usage of single sequence restri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.07244v1-abstract-full').style.display = 'inline'; document.getElementById('2007.07244v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.07244v1-abstract-full" style="display: none;"> Current state-of-the-art AI based classical music creation algorithms such as Music Transformer are trained by employing single sequence of notes with time-shifts. The major drawback of absolute time interval expression is the difficulty of similarity computing of notes that share the same note value yet different tempos, in one or among MIDI files. In addition, the usage of single sequence restricts the model to separately and effectively learn music information such as harmony and rhythm. In this paper, we propose a framework with two novel methods to respectively track these two shortages, one is the construction of time-valued note sequences that liberate note values from tempos and the other is the separated usage of four sequences, namely, former note on to current note on, note on to note off, pitch, and velocity, for jointly training of four Transformer-XL networks. Through training on a 23-hour piano MIDI dataset, our framework generates significantly better and hour-level longer music than three state-of-the-art baselines, namely Music Transformer, DeepJ, and single sequence-based Transformer-XL, evaluated automatically and manually. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.07244v1-abstract-full').style.display = 'none'; document.getElementById('2007.07244v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2003.10392">arXiv:2003.10392</a> <span> [<a href="https://arxiv.org/pdf/2003.10392">pdf</a>, <a href="https://arxiv.org/format/2003.10392">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Steepest Descent Neural Architecture Optimization: Escaping Local Optimum with Signed Neural Splitting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+L">Lemeng Wu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+M">Mao Ye</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J+D">Jason D. Lee</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qiang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2003.10392v5-abstract-short" style="display: inline;"> Developing efficient and principled neural architecture optimization methods is a critical challenge of modern deep learning. Recently, Liu et al.[19] proposed a splitting steepest descent (S2D) method that jointly optimizes the neural parameters and architectures based on progressively growing network structures by splitting neurons into multiple copies in a steepest descent fashion. However, S2D… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.10392v5-abstract-full').style.display = 'inline'; document.getElementById('2003.10392v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2003.10392v5-abstract-full" style="display: none;"> Developing efficient and principled neural architecture optimization methods is a critical challenge of modern deep learning. Recently, Liu et al.[19] proposed a splitting steepest descent (S2D) method that jointly optimizes the neural parameters and architectures based on progressively growing network structures by splitting neurons into multiple copies in a steepest descent fashion. However, S2D suffers from a local optimality issue when all the neurons become "splitting stable", a concept akin to local stability in parametric optimization. In this work, we develop a significant and surprising extension of the splitting descent framework that addresses the local optimality issue. The idea is to observe that the original S2D is unnecessarily restricted to splitting neurons into positive weighted copies. By simply allowing both positive and negative weights during splitting, we can eliminate the appearance of splitting stability in S2D and hence escape the local optima to obtain better performance. By incorporating signed splittings, we significantly extend the optimization power of splitting steepest descent both theoretically and empirically. We verify our method on various challenging benchmarks such as CIFAR-100, ImageNet and ModelNet40, on which we outperform S2D and other advanced methods on learning accurate and energy-efficient neural networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.10392v5-abstract-full').style.display = 'none'; document.getElementById('2003.10392v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 March, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2003.08089">arXiv:2003.08089</a> <span> [<a href="https://arxiv.org/pdf/2003.08089">pdf</a>, <a href="https://arxiv.org/format/2003.08089">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Solving Inverse Problems with a Flow-based Noise Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Whang%2C+J">Jay Whang</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Dimakis%2C+A+G">Alexandros G. Dimakis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2003.08089v3-abstract-short" style="display: inline;"> We study image inverse problems with a normalizing flow prior. Our formulation views the solution as the maximum a posteriori estimate of the image conditioned on the measurements. This formulation allows us to use noise models with arbitrary dependencies as well as non-linear forward operators. We empirically validate the efficacy of our method on various inverse problems, including compressed se… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.08089v3-abstract-full').style.display = 'inline'; document.getElementById('2003.08089v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2003.08089v3-abstract-full" style="display: none;"> We study image inverse problems with a normalizing flow prior. Our formulation views the solution as the maximum a posteriori estimate of the image conditioned on the measurements. This formulation allows us to use noise models with arbitrary dependencies as well as non-linear forward operators. We empirically validate the efficacy of our method on various inverse problems, including compressed sensing with quantized measurements and denoising with highly structured noise patterns. We also present initial theoretical recovery guarantees for solving inverse problems with a flow prior. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2003.08089v3-abstract-full').style.display = 'none'; document.getElementById('2003.08089v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 March, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.09434">arXiv:2002.09434</a> <span> [<a href="https://arxiv.org/pdf/2002.09434">pdf</a>, <a href="https://arxiv.org/ps/2002.09434">ps</a>, <a href="https://arxiv.org/format/2002.09434">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Few-Shot Learning via Learning the Representation, Provably </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+S+S">Simon S. Du</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+W">Wei Hu</a>, <a href="/search/cs?searchtype=author&query=Kakade%2C+S+M">Sham M. Kakade</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J+D">Jason D. Lee</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.09434v2-abstract-short" style="display: inline;"> This paper studies few-shot learning via representation learning, where one uses $T$ source tasks with $n_1$ data per task to learn a representation in order to reduce the sample complexity of a target task for which there is only $n_2 (\ll n_1)$ data. Specifically, we focus on the setting where there exists a good \emph{common representation} between source and target, and our goal is to understa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.09434v2-abstract-full').style.display = 'inline'; document.getElementById('2002.09434v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.09434v2-abstract-full" style="display: none;"> This paper studies few-shot learning via representation learning, where one uses $T$ source tasks with $n_1$ data per task to learn a representation in order to reduce the sample complexity of a target task for which there is only $n_2 (\ll n_1)$ data. Specifically, we focus on the setting where there exists a good \emph{common representation} between source and target, and our goal is to understand how much of a sample size reduction is possible. First, we study the setting where this common representation is low-dimensional and provide a fast rate of $O\left(\frac{\mathcal{C}\left(桅\right)}{n_1T} + \frac{k}{n_2}\right)$; here, $桅$ is the representation function class, $\mathcal{C}\left(桅\right)$ is its complexity measure, and $k$ is the dimension of the representation. When specialized to linear representation functions, this rate becomes $O\left(\frac{dk}{n_1T} + \frac{k}{n_2}\right)$ where $d (\gg k)$ is the ambient input dimension, which is a substantial improvement over the rate without using representation learning, i.e. over the rate of $O\left(\frac{d}{n_2}\right)$. This result bypasses the $惟(\frac{1}{T})$ barrier under the i.i.d. task assumption, and can capture the desired property that all $n_1T$ samples from source tasks can be \emph{pooled} together for representation learning. Next, we consider the setting where the common representation may be high-dimensional but is capacity-constrained (say in norm); here, we again demonstrate the advantage of representation learning in both high-dimensional linear regression and neural network learning. Our results demonstrate representation learning can fully utilize all $n_1T$ samples from source tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.09434v2-abstract-full').style.display = 'none'; document.getElementById('2002.09434v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.06789">arXiv:2002.06789</a> <span> [<a href="https://arxiv.org/pdf/2002.06789">pdf</a>, <a href="https://arxiv.org/format/2002.06789">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> CAT: Customized Adversarial Training for Improved Robustness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+M">Minhao Cheng</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+P">Pin-Yu Chen</a>, <a href="/search/cs?searchtype=author&query=Dhillon%2C+I">Inderjit Dhillon</a>, <a href="/search/cs?searchtype=author&query=Hsieh%2C+C">Cho-Jui Hsieh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.06789v1-abstract-short" style="display: inline;"> Adversarial training has become one of the most effective methods for improving robustness of neural networks. However, it often suffers from poor generalization on both clean and perturbed data. In this paper, we propose a new algorithm, named Customized Adversarial Training (CAT), which adaptively customizes the perturbation level and the corresponding label for each training sample in adversari… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.06789v1-abstract-full').style.display = 'inline'; document.getElementById('2002.06789v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.06789v1-abstract-full" style="display: none;"> Adversarial training has become one of the most effective methods for improving robustness of neural networks. However, it often suffers from poor generalization on both clean and perturbed data. In this paper, we propose a new algorithm, named Customized Adversarial Training (CAT), which adaptively customizes the perturbation level and the corresponding label for each training sample in adversarial training. We show that the proposed algorithm achieves better clean and robust accuracy than previous adversarial training methods through extensive experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.06789v1-abstract-full').style.display = 'none'; document.getElementById('2002.06789v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2002.06768">arXiv:2002.06768</a> <span> [<a href="https://arxiv.org/pdf/2002.06768">pdf</a>, <a href="https://arxiv.org/format/2002.06768">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Last iterate convergence in no-regret learning: constrained min-max optimization for convex-concave landscapes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Nagarajan%2C+S+G">Sai Ganesh Nagarajan</a>, <a href="/search/cs?searchtype=author&query=Panageas%2C+I">Ioannis Panageas</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiao Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2002.06768v2-abstract-short" style="display: inline;"> In a recent series of papers it has been established that variants of Gradient Descent/Ascent and Mirror Descent exhibit last iterate convergence in convex-concave zero-sum games. Specifically, \cite{DISZ17, LiangS18} show last iterate convergence of the so called "Optimistic Gradient Descent/Ascent" for the case of \textit{unconstrained} min-max optimization. Moreover, in \cite{Metal} the authors… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.06768v2-abstract-full').style.display = 'inline'; document.getElementById('2002.06768v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2002.06768v2-abstract-full" style="display: none;"> In a recent series of papers it has been established that variants of Gradient Descent/Ascent and Mirror Descent exhibit last iterate convergence in convex-concave zero-sum games. Specifically, \cite{DISZ17, LiangS18} show last iterate convergence of the so called "Optimistic Gradient Descent/Ascent" for the case of \textit{unconstrained} min-max optimization. Moreover, in \cite{Metal} the authors show that Mirror Descent with an extra gradient step displays last iterate convergence for convex-concave problems (both constrained and unconstrained), though their algorithm does not follow the online learning framework; it uses extra information rather than \textit{only} the history to compute the next iteration. In this work, we show that "Optimistic Multiplicative-Weights Update (OMWU)" which follows the no-regret online learning framework, exhibits last iterate convergence locally for convex-concave games, generalizing the results of \cite{DP19} where last iterate convergence of OMWU was shown only for the \textit{bilinear case}. We complement our results with experiments that indicate fast convergence of the method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2002.06768v2-abstract-full').style.display = 'none'; document.getElementById('2002.06768v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 February, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2001.01279">arXiv:2001.01279</a> <span> </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.knosys.2020.106230.">10.1016/j.knosys.2020.106230. <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Deep Transfer Convolutional Neural Network and Extreme Learning Machine for Lung Nodule Diagnosis on CT images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xufeng Huang</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qiang Lei</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tingli Xie</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yahui Zhang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Z">Zhen Hu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Q">Qi Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2001.01279v2-abstract-short" style="display: inline;"> Some content of the article needs to be kept secret </span> <span class="abstract-full has-text-grey-dark mathjax" id="2001.01279v2-abstract-full" style="display: none;"> Some content of the article needs to be kept secret <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2001.01279v2-abstract-full').style.display = 'none'; document.getElementById('2001.01279v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 January, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Some content of the article needs to be kept secret</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Knowledge-Based Systems (2020) 106230 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1910.07703">arXiv:1910.07703</a> <span> [<a href="https://arxiv.org/pdf/1910.07703">pdf</a>, <a href="https://arxiv.org/format/1910.07703">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Communication-Efficient Asynchronous Stochastic Frank-Wolfe over Nuclear-norm Balls </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhuo%2C+J">Jiacheng Zhuo</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Q">Qi Lei</a>, <a href="/search/cs?searchtype=author&query=Dimakis%2C+A+G">Alexandros G. Dimakis</a>, <a href="/search/cs?searchtype=author&query=Caramanis%2C+C">Constantine Caramanis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1910.07703v1-abstract-short" style="display: inline;"> Large-scale machine learning training suffers from two prior challenges, specifically for nuclear-norm constrained problems with distributed systems: the synchronization slowdown due to the straggling workers, and high communication costs. In this work, we propose an asynchronous Stochastic Frank Wolfe (SFW-asyn) method, which, for the first time, solves the two problems simultaneously, while succ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.07703v1-abstract-full').style.display = 'inline'; document.getElementById('1910.07703v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1910.07703v1-abstract-full" style="display: none;"> Large-scale machine learning training suffers from two prior challenges, specifically for nuclear-norm constrained problems with distributed systems: the synchronization slowdown due to the straggling workers, and high communication costs. In this work, we propose an asynchronous Stochastic Frank Wolfe (SFW-asyn) method, which, for the first time, solves the two problems simultaneously, while successfully maintaining the same convergence rate as the vanilla SFW. We implement our algorithm in python (with MPI) to run on Amazon EC2, and demonstrate that SFW-asyn yields speed-ups almost linear to the number of machines compared to the vanilla SFW. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1910.07703v1-abstract-full').style.display = 'none'; document.getElementById('1910.07703v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2019. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Lei%2C+Q&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Lei%2C+Q&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Lei%2C+Q&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>