Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 59 results for author: <span class="mathjax">Lyu, F</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Lyu%2C+F">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Lyu, F"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Lyu%2C+F&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Lyu, F"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Lyu%2C+F&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Lyu%2C+F&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Lyu%2C+F&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02998">arXiv:2502.02998</a> <span> [<a href="https://arxiv.org/pdf/2502.02998">pdf</a>, <a href="https://arxiv.org/format/2502.02998">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Conformal Uncertainty Indicator for Continual Test-Time Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hanyu Zhao</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Z">Ziqi Shi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Ye Liu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+F">Fuyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhang Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02998v1-abstract-short" style="display: inline;"> Continual Test-Time Adaptation (CTTA) aims to adapt models to sequentially changing domains during testing, relying on pseudo-labels for self-adaptation. However, incorrect pseudo-labels can accumulate, leading to performance degradation. To address this, we propose a Conformal Uncertainty Indicator (CUI) for CTTA, leveraging Conformal Prediction (CP) to generate prediction sets that include the t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02998v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02998v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02998v1-abstract-full" style="display: none;"> Continual Test-Time Adaptation (CTTA) aims to adapt models to sequentially changing domains during testing, relying on pseudo-labels for self-adaptation. However, incorrect pseudo-labels can accumulate, leading to performance degradation. To address this, we propose a Conformal Uncertainty Indicator (CUI) for CTTA, leveraging Conformal Prediction (CP) to generate prediction sets that include the true label with a specified coverage probability. Since domain shifts can lower the coverage than expected, making CP unreliable, we dynamically compensate for the coverage by measuring both domain and data differences. Reliable pseudo-labels from CP are then selectively utilized to enhance adaptation. Experiments confirm that CUI effectively estimates uncertainty and improves adaptation performance across various existing CTTA methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02998v1-abstract-full').style.display = 'none'; document.getElementById('2502.02998v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.12654">arXiv:2412.12654</a> <span> [<a href="https://arxiv.org/pdf/2412.12654">pdf</a>, <a href="https://arxiv.org/format/2412.12654">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CALA: A Class-Aware Logit Adapter for Few-Shot Class-Incremental Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chengyan Liu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+L">Linglan Zhao</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+K">Kaile Du</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+F">Fuyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+T">Tao Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.12654v1-abstract-short" style="display: inline;"> Few-Shot Class-Incremental Learning (FSCIL) defines a practical but challenging task where models are required to continuously learn novel concepts with only a few training samples. Due to data scarcity, existing FSCIL methods resort to training a backbone with abundant base data and then keeping it frozen afterward. However, the above operation often causes the backbone to overfit to base classes… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12654v1-abstract-full').style.display = 'inline'; document.getElementById('2412.12654v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.12654v1-abstract-full" style="display: none;"> Few-Shot Class-Incremental Learning (FSCIL) defines a practical but challenging task where models are required to continuously learn novel concepts with only a few training samples. Due to data scarcity, existing FSCIL methods resort to training a backbone with abundant base data and then keeping it frozen afterward. However, the above operation often causes the backbone to overfit to base classes while overlooking the novel ones, leading to severe confusion between them. To address this issue, we propose Class-Aware Logit Adapter (CALA). Our method involves a lightweight adapter that learns to rectify biased predictions through a pseudo-incremental learning paradigm. In the real FSCIL process, we use the learned adapter to dynamically generate robust balancing factors. These factors can adjust confused novel instances back to their true label space based on their similarity to base classes. Specifically, when confusion is more likely to occur in novel instances that closely resemble base classes, greater rectification is required. Notably, CALA operates on the classifier level, preserving the original feature space, thus it can be flexibly plugged into most of the existing FSCIL works for improved performance. Experiments on three benchmark datasets consistently validate the effectiveness and flexibility of CALA. Codes will be available upon acceptance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12654v1-abstract-full').style.display = 'none'; document.getElementById('2412.12654v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15731">arXiv:2411.15731</a> <span> [<a href="https://arxiv.org/pdf/2411.15731">pdf</a>, <a href="https://arxiv.org/format/2411.15731">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Fusion Matters: Learning Fusion in Deep Click-through Rate Prediction Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kexin Zhang</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xing Tang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Dugang Liu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+C">Chen Ma</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+K">Kaize Ding</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xiuqiang He</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xue Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15731v1-abstract-short" style="display: inline;"> The evolution of previous Click-Through Rate (CTR) models has mainly been driven by proposing complex components, whether shallow or deep, that are adept at modeling feature interactions. However, there has been less focus on improving fusion design. Instead, two naive solutions, stacked and parallel fusion, are commonly used. Both solutions rely on pre-determined fusion connections and fixed fusi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15731v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15731v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15731v1-abstract-full" style="display: none;"> The evolution of previous Click-Through Rate (CTR) models has mainly been driven by proposing complex components, whether shallow or deep, that are adept at modeling feature interactions. However, there has been less focus on improving fusion design. Instead, two naive solutions, stacked and parallel fusion, are commonly used. Both solutions rely on pre-determined fusion connections and fixed fusion operations. It has been repetitively observed that changes in fusion design may result in different performances, highlighting the critical role that fusion plays in CTR models. While there have been attempts to refine these basic fusion strategies, these efforts have often been constrained to specific settings or dependent on specific components. Neural architecture search has also been introduced to partially deal with fusion design, but it comes with limitations. The complexity of the search space can lead to inefficient and ineffective results. To bridge this gap, we introduce OptFusion, a method that automates the learning of fusion, encompassing both the connection learning and the operation selection. We have proposed a one-shot learning algorithm tackling these tasks concurrently. Our experiments are conducted over three large-scale datasets. Extensive experiments prove both the effectiveness and efficiency of OptFusion in improving CTR model performance. Our code implementation is available here\url{https://github.com/kexin-kxzhang/OptFusion}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15731v1-abstract-full').style.display = 'none'; document.getElementById('2411.15731v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by WSDM 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12229">arXiv:2410.12229</a> <span> [<a href="https://arxiv.org/pdf/2410.12229">pdf</a>, <a href="https://arxiv.org/format/2410.12229">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Comprehending Knowledge Graphs with Large Language Models for Recommender Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cui%2C+Z">Ziqiang Cui</a>, <a href="/search/cs?searchtype=author&query=Weng%2C+Y">Yunpeng Weng</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xing Tang</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Dugang Liu</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xiuqiang He</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+C">Chen Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12229v2-abstract-short" style="display: inline;"> In recent years, the introduction of knowledge graphs (KGs) has significantly advanced recommender systems by facilitating the discovery of potential associations between items. However, existing methods still face several limitations. First, most KGs suffer from missing facts or limited scopes. Second, existing methods convert textual information in KGs into IDs, resulting in the loss of natural… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12229v2-abstract-full').style.display = 'inline'; document.getElementById('2410.12229v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12229v2-abstract-full" style="display: none;"> In recent years, the introduction of knowledge graphs (KGs) has significantly advanced recommender systems by facilitating the discovery of potential associations between items. However, existing methods still face several limitations. First, most KGs suffer from missing facts or limited scopes. Second, existing methods convert textual information in KGs into IDs, resulting in the loss of natural semantic connections between different items. Third, existing methods struggle to capture high-order connections in the global KG. To address these limitations, we propose a novel method called CoLaKG, which leverages large language models (LLMs) to improve KG-based recommendations. The extensive world knowledge and remarkable reasoning capabilities of LLMs enable our method to supplement missing facts in KGs. Additionally, their powerful text understanding abilities allow for better utilization of semantic information. Specifically, CoLaKG extracts useful information from the KG at both local and global levels. By employing item-centered subgraph extraction and prompt engineering, it accurately captures the local KG. Subsequently, through retrieval-based neighbor enhancement, it supplements the current item by capturing related items from the entire KG, thereby effectively utilizing global information. The local and global information extracted by the LLM are effectively integrated into the recommendation model through a representation fusion module and a retrieval-augmented representation learning module, respectively, thereby improving recommendation performance. Extensive experiments on four real-world datasets demonstrate the superiority of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12229v2-abstract-full').style.display = 'none'; document.getElementById('2410.12229v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05193">arXiv:2410.05193</a> <span> [<a href="https://arxiv.org/pdf/2410.05193">pdf</a>, <a href="https://arxiv.org/format/2410.05193">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> RevisEval: Improving LLM-as-a-Judge via Response-Adapted References </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qiyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yufei Wang</a>, <a href="/search/cs?searchtype=author&query=YU%2C+T">Tiezheng YU</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yuxin Jiang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+C">Chuhan Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Liangyou Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yasheng Wang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xin Jiang</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+L">Lifeng Shang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+R">Ruiming Tang</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+C">Chen Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05193v1-abstract-short" style="display: inline;"> With significant efforts in recent studies, LLM-as-a-Judge has become a cost-effective alternative to human evaluation for assessing the text generation quality in a wide range of tasks. However, there still remains a reliability gap between LLM-as-a-Judge and human evaluation. One important reason is the lack of guided oracles in the evaluation process. Motivated by the role of reference pervasiv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05193v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05193v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05193v1-abstract-full" style="display: none;"> With significant efforts in recent studies, LLM-as-a-Judge has become a cost-effective alternative to human evaluation for assessing the text generation quality in a wide range of tasks. However, there still remains a reliability gap between LLM-as-a-Judge and human evaluation. One important reason is the lack of guided oracles in the evaluation process. Motivated by the role of reference pervasively used in classic text evaluation, we introduce RevisEval, a novel text generation evaluation paradigm via the response-adapted references. RevisEval is driven by the key observation that an ideal reference should maintain the necessary relevance to the response to be evaluated. Specifically, RevisEval leverages the text revision capabilities of large language models (LLMs) to adaptively revise the response, then treat the revised text as the reference (response-adapted reference) for the subsequent evaluation. Extensive experiments demonstrate that RevisEval outperforms traditional reference-free and reference-based evaluation paradigms that use LLM-as-a-Judge across NLG tasks and open-ended instruction-following tasks. More importantly, our response-adapted references can further boost the classical text metrics, e.g., BLEU and BERTScore, compared to traditional references and even rival the LLM-as-a-Judge. A detailed analysis is also conducted to confirm RevisEval's effectiveness in bias reduction, the impact of inference cost, and reference relevance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05193v1-abstract-full').style.display = 'none'; document.getElementById('2410.05193v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14874">arXiv:2409.14874</a> <span> [<a href="https://arxiv.org/pdf/2409.14874">pdf</a>, <a href="https://arxiv.org/format/2409.14874">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Towards Ground-truth-free Evaluation of Any Segmentation in Medical Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Senbi%2C+A">Ahjol Senbi</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+T">Tianyu Huang</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fei Lyu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qing Li</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+Y">Yuhui Tao</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+W">Wei Shao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Q">Qiang Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chengyan Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuo Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+T">Tao Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yizhe Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14874v2-abstract-short" style="display: inline;"> We explore the feasibility and potential of building a ground-truth-free evaluation model to assess the quality of segmentations generated by the Segment Anything Model (SAM) and its variants in medical imaging. This evaluation model estimates segmentation quality scores by analyzing the coherence and consistency between the input images and their corresponding segmentation predictions. Based on p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14874v2-abstract-full').style.display = 'inline'; document.getElementById('2409.14874v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14874v2-abstract-full" style="display: none;"> We explore the feasibility and potential of building a ground-truth-free evaluation model to assess the quality of segmentations generated by the Segment Anything Model (SAM) and its variants in medical imaging. This evaluation model estimates segmentation quality scores by analyzing the coherence and consistency between the input images and their corresponding segmentation predictions. Based on prior research, we frame the task of training this model as a regression problem within a supervised learning framework, using Dice scores (and optionally other metrics) along with mean squared error to compute the training loss. The model is trained utilizing a large collection of public datasets of medical images with segmentation predictions from SAM and its variants. We name this model EvanySeg (Evaluation of Any Segmentation in Medical Images). Our exploration of convolution-based models (e.g., ResNet) and transformer-based models (e.g., ViT) suggested that ViT yields better performance for this task. EvanySeg can be employed for various tasks, including: (1) identifying poorly segmented samples by detecting low-percentile segmentation quality scores; (2) benchmarking segmentation models without ground truth by averaging quality scores across test samples; (3) alerting human experts to poor-quality segmentation predictions during human-AI collaboration by applying a threshold within the score space; and (4) selecting the best segmentation prediction for each test sample at test time when multiple segmentation models are available, by choosing the prediction with the highest quality score. Models and code will be made available at https://github.com/ahjolsenbics/EvanySeg. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14874v2-abstract-full').style.display = 'none'; document.getElementById('2409.14874v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 15 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.09072">arXiv:2409.09072</a> <span> [<a href="https://arxiv.org/pdf/2409.09072">pdf</a>, <a href="https://arxiv.org/format/2409.09072">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Joint Model Assignment and Resource Allocation for Cost-Effective Mobile Generative Services </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+S">Shuangwei Gao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+P">Peng Yang</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+Y">Yuxin Kong</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Feng Lyu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+N">Ning Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.09072v1-abstract-short" style="display: inline;"> Artificial Intelligence Generated Content (AIGC) services can efficiently satisfy user-specified content creation demands, but the high computational requirements pose various challenges to supporting mobile users at scale. In this paper, we present our design of an edge-enabled AIGC service provisioning system to properly assign computing tasks of generative models to edge servers, thereby improv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09072v1-abstract-full').style.display = 'inline'; document.getElementById('2409.09072v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.09072v1-abstract-full" style="display: none;"> Artificial Intelligence Generated Content (AIGC) services can efficiently satisfy user-specified content creation demands, but the high computational requirements pose various challenges to supporting mobile users at scale. In this paper, we present our design of an edge-enabled AIGC service provisioning system to properly assign computing tasks of generative models to edge servers, thereby improving overall user experience and reducing content generation latency. Specifically, once the edge server receives user requested task prompts, it dynamically assigns appropriate models and allocates computing resources based on features of each category of prompts. The generated contents are then delivered to users. The key to this system is a proposed probabilistic model assignment approach, which estimates the quality score of generated contents for each prompt based on category labels. Next, we introduce a heuristic algorithm that enables adaptive configuration of both generation steps and resource allocation, according to the various task requests received by each generative model on the edge.Simulation results demonstrate that the designed system can effectively enhance the quality of generated content by up to 4.7% while reducing response delay by up to 39.1% compared to benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.09072v1-abstract-full').style.display = 'none'; document.getElementById('2409.09072v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.05303">arXiv:2409.05303</a> <span> [<a href="https://arxiv.org/pdf/2409.05303">pdf</a>, <a href="https://arxiv.org/format/2409.05303">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Resource-Efficient Generative AI Model Deployment in Mobile Edge Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liang%2C+Y">Yuxin Liang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+P">Peng Yang</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yuanyuan He</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Feng Lyu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.05303v1-abstract-short" style="display: inline;"> The surging development of Artificial Intelligence-Generated Content (AIGC) marks a transformative era of the content creation and production. Edge servers promise attractive benefits, e.g., reduced service delay and backhaul traffic load, for hosting AIGC services compared to cloud-based solutions. However, the scarcity of available resources on the edge pose significant challenges in deploying g… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05303v1-abstract-full').style.display = 'inline'; document.getElementById('2409.05303v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.05303v1-abstract-full" style="display: none;"> The surging development of Artificial Intelligence-Generated Content (AIGC) marks a transformative era of the content creation and production. Edge servers promise attractive benefits, e.g., reduced service delay and backhaul traffic load, for hosting AIGC services compared to cloud-based solutions. However, the scarcity of available resources on the edge pose significant challenges in deploying generative AI models. In this paper, by characterizing the resource and delay demands of typical generative AI models, we find that the consumption of storage and GPU memory, as well as the model switching delay represented by I/O delay during the preloading phase, are significant and vary across models. These multidimensional coupling factors render it difficult to make efficient edge model deployment decisions. Hence, we present a collaborative edge-cloud framework aiming to properly manage generative AI model deployment on the edge. Specifically, we formulate edge model deployment problem considering heterogeneous features of models as an optimization problem, and propose a model-level decision selection algorithm to solve it. It enables pooled resource sharing and optimizes the trade-off between resource consumption and delay in edge generative AI model deployment. Simulation results validate the efficacy of the proposed algorithm compared with baselines, demonstrating its potential to reduce overall costs by providing feature-aware model deployment decisions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05303v1-abstract-full').style.display = 'none'; document.getElementById('2409.05303v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.12161">arXiv:2408.12161</a> <span> [<a href="https://arxiv.org/pdf/2408.12161">pdf</a>, <a href="https://arxiv.org/format/2408.12161">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Rebalancing Multi-Label Class-Incremental Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+K">Kaile Du</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yifan Zhou</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuyang Li</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+J">Junzhou Xie</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yixi Shen</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+F">Fuyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guangcan Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.12161v1-abstract-short" style="display: inline;"> Multi-label class-incremental learning (MLCIL) is essential for real-world multi-label applications, allowing models to learn new labels while retaining previously learned knowledge continuously. However, recent MLCIL approaches can only achieve suboptimal performance due to the oversight of the positive-negative imbalance problem, which manifests at both the label and loss levels because of the t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12161v1-abstract-full').style.display = 'inline'; document.getElementById('2408.12161v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.12161v1-abstract-full" style="display: none;"> Multi-label class-incremental learning (MLCIL) is essential for real-world multi-label applications, allowing models to learn new labels while retaining previously learned knowledge continuously. However, recent MLCIL approaches can only achieve suboptimal performance due to the oversight of the positive-negative imbalance problem, which manifests at both the label and loss levels because of the task-level partial label issue. The imbalance at the label level arises from the substantial absence of negative labels, while the imbalance at the loss level stems from the asymmetric contributions of the positive and negative loss parts to the optimization. To address the issue above, we propose a Rebalance framework for both the Loss and Label levels (RebLL), which integrates two key modules: asymmetric knowledge distillation (AKD) and online relabeling (OR). AKD is proposed to rebalance at the loss level by emphasizing the negative label learning in classification loss and down-weighting the contribution of overconfident predictions in distillation loss. OR is designed for label rebalance, which restores the original class distribution in memory by online relabeling the missing classes. Our comprehensive experiments on the PASCAL VOC and MS-COCO datasets demonstrate that this rebalancing strategy significantly improves performance, achieving new state-of-the-art results even with a vanilla CNN backbone. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12161v1-abstract-full').style.display = 'none'; document.getElementById('2408.12161v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08585">arXiv:2408.08585</a> <span> [<a href="https://arxiv.org/pdf/2408.08585">pdf</a>, <a href="https://arxiv.org/format/2408.08585">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> OptDist: Learning Optimal Distribution for Customer Lifetime Value Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Weng%2C+Y">Yunpeng Weng</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xing Tang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhenhao Xu</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Dugang Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Z">Zexu Sun</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xiuqiang He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08585v1-abstract-short" style="display: inline;"> Customer Lifetime Value (CLTV) prediction is a critical task in business applications. Accurately predicting CLTV is challenging in real-world business scenarios, as the distribution of CLTV is complex and mutable. Firstly, there is a large number of users without any consumption consisting of a long-tailed part that is too complex to fit. Secondly, the small set of high-value users spent orders o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08585v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08585v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08585v1-abstract-full" style="display: none;"> Customer Lifetime Value (CLTV) prediction is a critical task in business applications. Accurately predicting CLTV is challenging in real-world business scenarios, as the distribution of CLTV is complex and mutable. Firstly, there is a large number of users without any consumption consisting of a long-tailed part that is too complex to fit. Secondly, the small set of high-value users spent orders of magnitude more than a typical user leading to a wide range of the CLTV distribution which is hard to capture in a single distribution. Existing approaches for CLTV estimation either assume a prior probability distribution and fit a single group of distribution-related parameters for all samples, or directly learn from the posterior distribution with manually predefined buckets in a heuristic manner. However, all these methods fail to handle complex and mutable distributions. In this paper, we propose a novel optimal distribution selection model OptDist for CLTV prediction, which utilizes an adaptive optimal sub-distribution selection mechanism to improve the accuracy of complex distribution modeling. Specifically, OptDist trains several candidate sub-distribution networks in the distribution learning module (DLM) for modeling the probability distribution of CLTV. Then, a distribution selection module (DSM) is proposed to select the sub-distribution for each sample, thus making the selection automatically and adaptively. Besides, we design an alignment mechanism that connects both modules, which effectively guides the optimization. We conduct extensive experiments on both two public and one private dataset to verify that OptDist outperforms state-of-the-art baselines. Furthermore, OptDist has been deployed on a large-scale financial platform for customer acquisition marketing campaigns and the online experiments also demonstrate the effectiveness of OptDist. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08585v1-abstract-full').style.display = 'none'; document.getElementById('2408.08585v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CIKM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.18526">arXiv:2407.18526</a> <span> [<a href="https://arxiv.org/pdf/2407.18526">pdf</a>, <a href="https://arxiv.org/format/2407.18526">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Constructing Enhanced Mutual Information for Online Class-Incremental Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Huan Zhang</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+S">Shenghua Fan</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yujin Zheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Dingwen Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.18526v1-abstract-short" style="display: inline;"> Online Class-Incremental continual Learning (OCIL) addresses the challenge of continuously learning from a single-channel data stream, adapting to new tasks while mitigating catastrophic forgetting. Recently, Mutual Information (MI)-based methods have shown promising performance in OCIL. However, existing MI-based methods treat various knowledge components in isolation, ignoring the knowledge conf… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18526v1-abstract-full').style.display = 'inline'; document.getElementById('2407.18526v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.18526v1-abstract-full" style="display: none;"> Online Class-Incremental continual Learning (OCIL) addresses the challenge of continuously learning from a single-channel data stream, adapting to new tasks while mitigating catastrophic forgetting. Recently, Mutual Information (MI)-based methods have shown promising performance in OCIL. However, existing MI-based methods treat various knowledge components in isolation, ignoring the knowledge confusion across tasks. This narrow focus on simple MI knowledge alignment may lead to old tasks being easily forgotten with the introduction of new tasks, risking the loss of common parts between past and present knowledge.To address this, we analyze the MI relationships from the perspectives of diversity, representativeness, and separability, and propose an Enhanced Mutual Information (EMI) method based on knwoledge decoupling. EMI consists of Diversity Mutual Information (DMI), Representativeness Mutual Information (RMI) and Separability Mutual Information (SMI). DMI diversifies intra-class sample features by considering the similarity relationships among inter-class sample features to enable the network to learn more general knowledge. RMI summarizes representative features for each category and aligns sample features with these representative features, making the intra-class sample distribution more compact. SMI establishes MI relationships for inter-class representative features, enhancing the stability of representative features while increasing the distinction between inter-class representative features, thus creating clear boundaries between class. Extensive experimental results on widely used benchmark datasets demonstrate the superior performance of EMI over state-of-the-art baseline methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18526v1-abstract-full').style.display = 'none'; document.getElementById('2407.18526v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.08214">arXiv:2407.08214</a> <span> [<a href="https://arxiv.org/pdf/2407.08214">pdf</a>, <a href="https://arxiv.org/format/2407.08214">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards stable training of parallel continual learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yuepan%2C+L">Li Yuepan</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuyang Li</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+W">Wei Feng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guangcan Liu</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+F">Fanhua Shang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.08214v1-abstract-short" style="display: inline;"> Parallel Continual Learning (PCL) tasks investigate the training methods for continual learning with multi-source input, where data from different tasks are learned as they arrive. PCL offers high training efficiency and is well-suited for complex multi-source data systems, such as autonomous vehicles equipped with multiple sensors. However, at any time, multiple tasks need to be trained simultane… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08214v1-abstract-full').style.display = 'inline'; document.getElementById('2407.08214v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.08214v1-abstract-full" style="display: none;"> Parallel Continual Learning (PCL) tasks investigate the training methods for continual learning with multi-source input, where data from different tasks are learned as they arrive. PCL offers high training efficiency and is well-suited for complex multi-source data systems, such as autonomous vehicles equipped with multiple sensors. However, at any time, multiple tasks need to be trained simultaneously, leading to severe training instability in PCL. This instability manifests during both forward and backward propagation, where features are entangled and gradients are conflict. This paper introduces Stable Parallel Continual Learning (SPCL), a novel approach that enhances the training stability of PCL for both forward and backward propagation. For the forward propagation, we apply Doubly-block Toeplit (DBT) Matrix based orthogonality constraints to network parameters to ensure stable and consistent propagation. For the backward propagation, we employ orthogonal decomposition for gradient management stabilizes backpropagation and mitigates gradient conflicts across tasks. By optimizing gradients by ensuring orthogonality and minimizing the condition number, SPCL effectively stabilizing the gradient descent in complex optimization tasks. Experimental results demonstrate that SPCL outperforms state-of-the-art methjods and achieve better training stability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08214v1-abstract-full').style.display = 'none'; document.getElementById('2407.08214v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.02253">arXiv:2407.02253</a> <span> [<a href="https://arxiv.org/pdf/2407.02253">pdf</a>, <a href="https://arxiv.org/format/2407.02253">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Parameter-Selective Continual Test-Time Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tian%2C+J">Jiaxu Tian</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.02253v1-abstract-short" style="display: inline;"> Continual Test-Time Adaptation (CTTA) aims to adapt a pretrained model to ever-changing environments during the test time under continuous domain shifts. Most existing CTTA approaches are based on the Mean Teacher (MT) structure, which contains a student and a teacher model, where the student is updated using the pseudo-labels from the teacher model, and the teacher is then updated by exponential… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02253v1-abstract-full').style.display = 'inline'; document.getElementById('2407.02253v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.02253v1-abstract-full" style="display: none;"> Continual Test-Time Adaptation (CTTA) aims to adapt a pretrained model to ever-changing environments during the test time under continuous domain shifts. Most existing CTTA approaches are based on the Mean Teacher (MT) structure, which contains a student and a teacher model, where the student is updated using the pseudo-labels from the teacher model, and the teacher is then updated by exponential moving average strategy. However, these methods update the MT model indiscriminately on all parameters of the model. That is, some critical parameters involving sharing knowledge across different domains may be erased, intensifying error accumulation and catastrophic forgetting. In this paper, we introduce Parameter-Selective Mean Teacher (PSMT) method, which is capable of effectively updating the critical parameters within the MT network under domain shifts. First, we introduce a selective distillation mechanism in the student model, which utilizes past knowledge to regularize novel knowledge, thereby mitigating the impact of error accumulation. Second, to avoid catastrophic forgetting, in the teacher model, we create a mask through Fisher information to selectively update parameters via exponential moving average, with preservation measures applied to crucial parameters. Extensive experimental results verify that PSMT outperforms state-of-the-art methods across multiple benchmark datasets. Our code is available at \url{https://github.com/JiaxuTian/PSMT}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02253v1-abstract-full').style.display = 'none'; document.getElementById('2407.02253v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.01300">arXiv:2407.01300</a> <span> [<a href="https://arxiv.org/pdf/2407.01300">pdf</a>, <a href="https://arxiv.org/format/2407.01300">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Collaborative Performance Prediction for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qiyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xue Liu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+C">Chen Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.01300v2-abstract-short" style="display: inline;"> Comprehensively understanding and accurately predicting the performance of large language models across diverse downstream tasks has emerged as a pivotal challenge in NLP research. The pioneering scaling law on downstream works demonstrated intrinsic similarities within model families and utilized such similarities for performance prediction. However, they tend to overlook the similarities between… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01300v2-abstract-full').style.display = 'inline'; document.getElementById('2407.01300v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.01300v2-abstract-full" style="display: none;"> Comprehensively understanding and accurately predicting the performance of large language models across diverse downstream tasks has emerged as a pivotal challenge in NLP research. The pioneering scaling law on downstream works demonstrated intrinsic similarities within model families and utilized such similarities for performance prediction. However, they tend to overlook the similarities between model families and only consider design factors listed in the original scaling law. To overcome these limitations, we introduce a novel framework, Collaborative Performance Prediction (CPP), which significantly enhances prediction accuracy by leveraging the historical performance of various models on downstream tasks and other design factors for both model and task. We also collect a collaborative data sourced from online platforms containing both historical performance and additional design factors. With the support of the collaborative data, CPP not only surpasses traditional scaling laws in predicting the performance of scaled LLMs but also facilitates a detailed analysis of factor importance, an area previously overlooked. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01300v2-abstract-full').style.display = 'none'; document.getElementById('2407.01300v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In Proceedings of EMNLP 2024 Main Track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.02609">arXiv:2406.02609</a> <span> [<a href="https://arxiv.org/pdf/2406.02609">pdf</a>, <a href="https://arxiv.org/format/2406.02609">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Less is More: Pseudo-Label Filtering for Continual Test-Time Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+J">Jiayao Tan</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+C">Chenggong Ni</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+T">Tingliang Feng</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+F">Fuyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhang Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Shaochuang Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.02609v2-abstract-short" style="display: inline;"> Continual Test-Time Adaptation (CTTA) aims to adapt a pre-trained model to a sequence of target domains during the test phase without accessing the source data. To adapt to unlabeled data from unknown domains, existing methods rely on constructing pseudo-labels for all samples and updating the model through self-training. However, these pseudo-labels often involve noise, leading to insufficient ad… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02609v2-abstract-full').style.display = 'inline'; document.getElementById('2406.02609v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.02609v2-abstract-full" style="display: none;"> Continual Test-Time Adaptation (CTTA) aims to adapt a pre-trained model to a sequence of target domains during the test phase without accessing the source data. To adapt to unlabeled data from unknown domains, existing methods rely on constructing pseudo-labels for all samples and updating the model through self-training. However, these pseudo-labels often involve noise, leading to insufficient adaptation. To improve the quality of pseudo-labels, we propose a pseudo-label selection method for CTTA, called Pseudo Labeling Filter (PLF). The key idea of PLF is to keep selecting appropriate thresholds for pseudo-labels and identify reliable ones for self-training. Specifically, we present three principles for setting thresholds during continuous domain learning, including initialization, growth and diversity. Based on these principles, we design Self-Adaptive Thresholding to filter pseudo-labels. Additionally, we introduce a Class Prior Alignment (CPA) method to encourage the model to make diverse predictions for unknown domain samples. Through extensive experiments, PLF outperforms current state-of-the-art methods, proving its effectiveness in CTTA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02609v2-abstract-full').style.display = 'none'; document.getElementById('2406.02609v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2310.03335 by other authors</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.17054">arXiv:2405.17054</a> <span> [<a href="https://arxiv.org/pdf/2405.17054">pdf</a>, <a href="https://arxiv.org/format/2405.17054">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Improving Data-aware and Parameter-aware Robustness for Continual Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiao%2C+H">Hanxi Xiao</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.17054v1-abstract-short" style="display: inline;"> The goal of Continual Learning (CL) task is to continuously learn multiple new tasks sequentially while achieving a balance between the plasticity and stability of new and old knowledge. This paper analyzes that this insufficiency arises from the ineffective handling of outliers, leading to abnormal gradients and unexpected model updates. To address this issue, we enhance the data-aware and parame… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17054v1-abstract-full').style.display = 'inline'; document.getElementById('2405.17054v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.17054v1-abstract-full" style="display: none;"> The goal of Continual Learning (CL) task is to continuously learn multiple new tasks sequentially while achieving a balance between the plasticity and stability of new and old knowledge. This paper analyzes that this insufficiency arises from the ineffective handling of outliers, leading to abnormal gradients and unexpected model updates. To address this issue, we enhance the data-aware and parameter-aware robustness of CL, proposing a Robust Continual Learning (RCL) method. From the data perspective, we develop a contrastive loss based on the concepts of uniformity and alignment, forming a feature distribution that is more applicable to outliers. From the parameter perspective, we present a forward strategy for worst-case perturbation and apply robust gradient projection to the parameters. The experimental results on three benchmarks show that the proposed method effectively maintains robustness and achieves new state-of-the-art (SOTA) results. The code is available at: https://github.com/HanxiXiao/RCL <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17054v1-abstract-full').style.display = 'none'; document.getElementById('2405.17054v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.14602">arXiv:2405.14602</a> <span> [<a href="https://arxiv.org/pdf/2405.14602">pdf</a>, <a href="https://arxiv.org/format/2405.14602">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Controllable Continual Test-Time Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+Z">Ziqi Shi</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Ye Liu</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+F">Fanhua Shang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+F">Fuyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+W">Wei Feng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhang Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.14602v3-abstract-short" style="display: inline;"> Continual Test-Time Adaptation (CTTA) is an emerging and challenging task where a model trained in a source domain must adapt to continuously changing conditions during testing, without access to the original source data. CTTA is prone to error accumulation due to uncontrollable domain shifts, leading to blurred decision boundaries between categories. Existing CTTA methods primarily focus on suppr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14602v3-abstract-full').style.display = 'inline'; document.getElementById('2405.14602v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.14602v3-abstract-full" style="display: none;"> Continual Test-Time Adaptation (CTTA) is an emerging and challenging task where a model trained in a source domain must adapt to continuously changing conditions during testing, without access to the original source data. CTTA is prone to error accumulation due to uncontrollable domain shifts, leading to blurred decision boundaries between categories. Existing CTTA methods primarily focus on suppressing domain shifts, which proves inadequate during the unsupervised test phase. In contrast, we introduce a novel approach that guides rather than suppresses these shifts. Specifically, we propose $\textbf{C}$ontrollable $\textbf{Co}$ntinual $\textbf{T}$est-$\textbf{T}$ime $\textbf{A}$daptation (C-CoTTA), which explicitly prevents any single category from encroaching on others, thereby mitigating the mutual influence between categories caused by uncontrollable shifts. Moreover, our method reduces the sensitivity of model to domain transformations, thereby minimizing the magnitude of category shifts. Extensive quantitative experiments demonstrate the effectiveness of our method, while qualitative analyses, such as t-SNE plots, confirm the theoretical validity of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14602v3-abstract-full').style.display = 'none'; document.getElementById('2405.14602v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.09133">arXiv:2405.09133</a> <span> [<a href="https://arxiv.org/pdf/2405.09133">pdf</a>, <a href="https://arxiv.org/format/2405.09133">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Overcoming Domain Drift in Online Continual Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Daofeng Liu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+L">Linglan Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhang Zhang</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+F">Fanhua Shang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+F">Fuyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+W">Wei Feng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.09133v1-abstract-short" style="display: inline;"> Online Continual Learning (OCL) empowers machine learning models to acquire new knowledge online across a sequence of tasks. However, OCL faces a significant challenge: catastrophic forgetting, wherein the model learned in previous tasks is substantially overwritten upon encountering new tasks, leading to a biased forgetting of prior knowledge. Moreover, the continual doman drift in sequential lea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09133v1-abstract-full').style.display = 'inline'; document.getElementById('2405.09133v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.09133v1-abstract-full" style="display: none;"> Online Continual Learning (OCL) empowers machine learning models to acquire new knowledge online across a sequence of tasks. However, OCL faces a significant challenge: catastrophic forgetting, wherein the model learned in previous tasks is substantially overwritten upon encountering new tasks, leading to a biased forgetting of prior knowledge. Moreover, the continual doman drift in sequential learning tasks may entail the gradual displacement of the decision boundaries in the learned feature space, rendering the learned knowledge susceptible to forgetting. To address the above problem, in this paper, we propose a novel rehearsal strategy, termed Drift-Reducing Rehearsal (DRR), to anchor the domain of old tasks and reduce the negative transfer effects. First, we propose to select memory for more representative samples guided by constructed centroids in a data stream. Then, to keep the model from domain chaos in drifting, a two-level angular cross-task Contrastive Margin Loss (CML) is proposed, to encourage the intra-class and intra-task compactness, and increase the inter-class and inter-task discrepancy. Finally, to further suppress the continual domain drift, we present an optional Centorid Distillation Loss (CDL) on the rehearsal memory to anchor the knowledge in feature space for each previous old task. Extensive experimental results on four benchmark datasets validate that the proposed DRR can effectively mitigate the continual domain drift and achieve the state-of-the-art (SOTA) performance in OCL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09133v1-abstract-full').style.display = 'none'; document.getElementById('2405.09133v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.07200">arXiv:2404.07200</a> <span> [<a href="https://arxiv.org/pdf/2404.07200">pdf</a>, <a href="https://arxiv.org/format/2404.07200">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Toward a Better Understanding of Fourier Neural Operators from a Spectral Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qin%2C+S">Shaoxiang Qin</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+W">Wenhui Peng</a>, <a href="/search/cs?searchtype=author&query=Geng%2C+D">Dingyang Geng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Ju Wang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xing Tang</a>, <a href="/search/cs?searchtype=author&query=Leroyer%2C+S">Sylvie Leroyer</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+N">Naiping Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xue Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L+L">Liangzhu Leon Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.07200v2-abstract-short" style="display: inline;"> In solving partial differential equations (PDEs), Fourier Neural Operators (FNOs) have exhibited notable effectiveness. However, FNO is observed to be ineffective with large Fourier kernels that parameterize more frequencies. Current solutions rely on setting small kernels, restricting FNO's ability to capture complex PDE data in real-world applications. This paper offers empirical insights into F… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07200v2-abstract-full').style.display = 'inline'; document.getElementById('2404.07200v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.07200v2-abstract-full" style="display: none;"> In solving partial differential equations (PDEs), Fourier Neural Operators (FNOs) have exhibited notable effectiveness. However, FNO is observed to be ineffective with large Fourier kernels that parameterize more frequencies. Current solutions rely on setting small kernels, restricting FNO's ability to capture complex PDE data in real-world applications. This paper offers empirical insights into FNO's difficulty with large kernels through spectral analysis: FNO exhibits a unique Fourier parameterization bias, excelling at learning dominant frequencies in target data while struggling with non-dominant frequencies. To mitigate such a bias, we propose SpecB-FNO to enhance the capture of non-dominant frequencies by adopting additional residual modules to learn from the previous ones' prediction residuals iteratively. By effectively utilizing large Fourier kernels, SpecB-FNO achieves better prediction accuracy on diverse PDE applications, with an average improvement of 50%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.07200v2-abstract-full').style.display = 'none'; document.getElementById('2404.07200v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.17442">arXiv:2403.17442</a> <span> [<a href="https://arxiv.org/pdf/2403.17442">pdf</a>, <a href="https://arxiv.org/format/2403.17442">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Touch the Core: Exploring Task Dependence Among Hybrid Targets for Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xing Tang</a>, <a href="/search/cs?searchtype=author&query=Qiao%2C+Y">Yang Qiao</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Dugang Liu</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xiuqiang He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.17442v2-abstract-short" style="display: inline;"> As user behaviors become complicated on business platforms, online recommendations focus more on how to touch the core conversions, which are highly related to the interests of platforms. These core conversions are usually continuous targets, such as \textit{watch time}, \textit{revenue}, and so on, whose predictions can be enhanced by previous discrete conversion actions. Therefore, multi-task le… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.17442v2-abstract-full').style.display = 'inline'; document.getElementById('2403.17442v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.17442v2-abstract-full" style="display: none;"> As user behaviors become complicated on business platforms, online recommendations focus more on how to touch the core conversions, which are highly related to the interests of platforms. These core conversions are usually continuous targets, such as \textit{watch time}, \textit{revenue}, and so on, whose predictions can be enhanced by previous discrete conversion actions. Therefore, multi-task learning (MTL) can be adopted as the paradigm to learn these hybrid targets. However, existing works mainly emphasize investigating the sequential dependence among discrete conversion actions, which neglects the complexity of dependence between discrete conversions and the final continuous conversion. Moreover, simultaneously optimizing hybrid tasks with stronger task dependence will suffer from volatile issues where the core regression task might have a larger influence on other tasks. In this paper, we study the MTL problem with hybrid targets for the first time and propose the model named Hybrid Targets Learning Network (HTLNet) to explore task dependence and enhance optimization. Specifically, we introduce label embedding for each task to explicitly transfer the label information among these tasks, which can effectively explore logical task dependence. We also further design the gradient adjustment regime between the final regression task and other classification tasks to enhance the optimization. Extensive experiments on two offline public datasets and one real-world industrial dataset are conducted to validate the effectiveness of HTLNet. Moreover, online A/B tests on the financial recommender system also show that our model has improved significantly. Our implementation is available here\footnote{\url{https://github.com/fuyuanlyu/HTLNet}}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.17442v2-abstract-full').style.display = 'none'; document.getElementById('2403.17442v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by RecSys 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.12559">arXiv:2403.12559</a> <span> [<a href="https://arxiv.org/pdf/2403.12559">pdf</a>, <a href="https://arxiv.org/format/2403.12559">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Confidence Self-Calibration for Multi-Label Class-Incremental Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+K">Kaile Du</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yifan Zhou</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuyang Li</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+C">Chen Lu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guangcan Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.12559v2-abstract-short" style="display: inline;"> The partial label challenge in Multi-Label Class-Incremental Learning (MLCIL) arises when only the new classes are labeled during training, while past and future labels remain unavailable. This issue leads to a proliferation of false-positive errors due to erroneously high confidence multi-label predictions, exacerbating catastrophic forgetting within the disjoint label space. In this paper, we ai… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.12559v2-abstract-full').style.display = 'inline'; document.getElementById('2403.12559v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.12559v2-abstract-full" style="display: none;"> The partial label challenge in Multi-Label Class-Incremental Learning (MLCIL) arises when only the new classes are labeled during training, while past and future labels remain unavailable. This issue leads to a proliferation of false-positive errors due to erroneously high confidence multi-label predictions, exacerbating catastrophic forgetting within the disjoint label space. In this paper, we aim to refine multi-label confidence calibration in MLCIL and propose a Confidence Self-Calibration (CSC) approach. Firstly, for label relationship calibration, we introduce a class-incremental graph convolutional network that bridges the isolated label spaces by constructing learnable, dynamically extended label relationship graph. Then, for confidence calibration, we present a max-entropy regularization for each multi-label increment, facilitating confidence self-calibration through the penalization of over-confident output distributions. Our approach attains new state-of-the-art results in MLCIL tasks on both MS-COCO and PASCAL VOC datasets, with the calibration of label confidences confirmed through our methodology. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.12559v2-abstract-full').style.display = 'none'; document.getElementById('2403.12559v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at the European Conference on Computer Vision (ECCV) 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.18609">arXiv:2402.18609</a> <span> [<a href="https://arxiv.org/pdf/2402.18609">pdf</a>, <a href="https://arxiv.org/format/2402.18609">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> ICE-SEARCH: A Language Model-Driven Feature Selection Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+T">Tianze Yang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+T">Tianyi Yang</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shaoshan Liu</a>, <a href="/search/cs?searchtype=author&query=Xue"> Xue</a>, <a href="/search/cs?searchtype=author&query=Liu"> Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.18609v4-abstract-short" style="display: inline;"> This study unveils the In-Context Evolutionary Search (ICE-SEARCH) method, which is among the first works that melds large language models (LLMs) with evolutionary algorithms for feature selection (FS) tasks and demonstrates its effectiveness in Medical Predictive Analytics (MPA) applications. ICE-SEARCH harnesses the crossover and mutation capabilities inherent in LLMs within an evolutionary fram… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18609v4-abstract-full').style.display = 'inline'; document.getElementById('2402.18609v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.18609v4-abstract-full" style="display: none;"> This study unveils the In-Context Evolutionary Search (ICE-SEARCH) method, which is among the first works that melds large language models (LLMs) with evolutionary algorithms for feature selection (FS) tasks and demonstrates its effectiveness in Medical Predictive Analytics (MPA) applications. ICE-SEARCH harnesses the crossover and mutation capabilities inherent in LLMs within an evolutionary framework, significantly improving FS through the model's comprehensive world knowledge and its adaptability to a variety of roles. Our evaluation of this methodology spans three crucial MPA tasks: stroke, cardiovascular disease, and diabetes, where ICE-SEARCH outperforms traditional FS methods in pinpointing essential features for medical applications. ICE-SEARCH achieves State-of-the-Art (SOTA) performance in stroke prediction and diabetes prediction; the Decision-Randomized ICE-SEARCH ranks as SOTA in cardiovascular disease prediction. The study emphasizes the critical role of incorporating domain-specific insights, illustrating ICE-SEARCH's robustness, generalizability, and convergence. This opens avenues for further research into comprehensive and intricate FS landscapes, marking a significant stride in the application of artificial intelligence in medical predictive analytics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18609v4-abstract-full').style.display = 'none'; document.getElementById('2402.18609v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.08182">arXiv:2402.08182</a> <span> [<a href="https://arxiv.org/pdf/2402.08182">pdf</a>, <a href="https://arxiv.org/format/2402.08182">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Variational Continual Test-Time Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+K">Kaile Du</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuyang Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hanyu Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhang Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guangcan Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.08182v1-abstract-short" style="display: inline;"> The prior drift is crucial in Continual Test-Time Adaptation (CTTA) methods that only use unlabeled test data, as it can cause significant error propagation. In this paper, we introduce VCoTTA, a variational Bayesian approach to measure uncertainties in CTTA. At the source stage, we transform a pre-trained deterministic model into a Bayesian Neural Network (BNN) via a variational warm-up strategy,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.08182v1-abstract-full').style.display = 'inline'; document.getElementById('2402.08182v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.08182v1-abstract-full" style="display: none;"> The prior drift is crucial in Continual Test-Time Adaptation (CTTA) methods that only use unlabeled test data, as it can cause significant error propagation. In this paper, we introduce VCoTTA, a variational Bayesian approach to measure uncertainties in CTTA. At the source stage, we transform a pre-trained deterministic model into a Bayesian Neural Network (BNN) via a variational warm-up strategy, injecting uncertainties into the model. During the testing time, we employ a mean-teacher update strategy using variational inference for the student model and exponential moving average for the teacher model. Our novel approach updates the student model by combining priors from both the source and teacher models. The evidence lower bound is formulated as the cross-entropy between the student and teacher models, along with the Kullback-Leibler (KL) divergence of the prior mixture. Experimental results on three datasets demonstrate the method's effectiveness in mitigating prior drift within the CTTA framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.08182v1-abstract-full').style.display = 'none'; document.getElementById('2402.08182v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.01054">arXiv:2401.01054</a> <span> [<a href="https://arxiv.org/pdf/2401.01054">pdf</a>, <a href="https://arxiv.org/format/2401.01054">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Elastic Multi-Gradient Descent for Parallel Continual Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+W">Wei Feng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuepan Li</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Q">Qing Sun</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+F">Fanhua Shang</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+L">Liang Wan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.01054v1-abstract-short" style="display: inline;"> The goal of Continual Learning (CL) is to continuously learn from new data streams and accomplish the corresponding tasks. Previously studied CL assumes that data are given in sequence nose-to-tail for different tasks, thus indeed belonging to Serial Continual Learning (SCL). This paper studies the novel paradigm of Parallel Continual Learning (PCL) in dynamic multi-task scenarios, where a diverse… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01054v1-abstract-full').style.display = 'inline'; document.getElementById('2401.01054v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.01054v1-abstract-full" style="display: none;"> The goal of Continual Learning (CL) is to continuously learn from new data streams and accomplish the corresponding tasks. Previously studied CL assumes that data are given in sequence nose-to-tail for different tasks, thus indeed belonging to Serial Continual Learning (SCL). This paper studies the novel paradigm of Parallel Continual Learning (PCL) in dynamic multi-task scenarios, where a diverse set of tasks is encountered at different time points. PCL presents challenges due to the training of an unspecified number of tasks with varying learning progress, leading to the difficulty of guaranteeing effective model updates for all encountered tasks. In our previous conference work, we focused on measuring and reducing the discrepancy among gradients in a multi-objective optimization problem, which, however, may still contain negative transfers in every model update. To address this issue, in the dynamic multi-objective optimization problem, we introduce task-specific elastic factors to adjust the descent direction towards the Pareto front. The proposed method, called Elastic Multi-Gradient Descent (EMGD), ensures that each update follows an appropriate Pareto descent direction, minimizing any negative impact on previously learned tasks. To balance the training between old and new tasks, we also propose a memory editing mechanism guided by the gradient computed using EMGD. This editing process updates the stored data points, reducing interference in the Pareto descent direction from previous tasks. Experiments on public datasets validate the effectiveness of our EMGD in the PCL setting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.01054v1-abstract-full').style.display = 'none'; document.getElementById('2401.01054v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submited to IEEE TPAMI</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.03526">arXiv:2311.03526</a> <span> [<a href="https://arxiv.org/pdf/2311.03526">pdf</a>, <a href="https://arxiv.org/format/2311.03526">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Towards Automated Negative Sampling in Implicit Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yaochen Hu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xing Tang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yingxue Zhang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+R">Ruiming Tang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xue Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.03526v1-abstract-short" style="display: inline;"> Negative sampling methods are vital in implicit recommendation models as they allow us to obtain negative instances from massive unlabeled data. Most existing approaches focus on sampling hard negative samples in various ways. These studies are orthogonal to the recommendation model and implicit datasets. However, such an idea contradicts the common belief in AutoML that the model and dataset shou… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.03526v1-abstract-full').style.display = 'inline'; document.getElementById('2311.03526v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.03526v1-abstract-full" style="display: none;"> Negative sampling methods are vital in implicit recommendation models as they allow us to obtain negative instances from massive unlabeled data. Most existing approaches focus on sampling hard negative samples in various ways. These studies are orthogonal to the recommendation model and implicit datasets. However, such an idea contradicts the common belief in AutoML that the model and dataset should be matched. Empirical experiments suggest that the best-performing negative sampler depends on the implicit dataset and the specific recommendation model. Hence, we propose a hypothesis that the negative sampler should align with the capacity of the recommendation models as well as the statistics of the datasets to achieve optimal performance. A mismatch between these three would result in sub-optimal outcomes. An intuitive idea to address the mismatch problem is to exhaustively select the best-performing negative sampler given the model and dataset. However, such an approach is computationally expensive and time-consuming, leaving the problem unsolved. In this work, we propose the AutoSample framework that adaptively selects the best-performing negative sampler among candidates. Specifically, we propose a loss-to-instance approximation to transform the negative sampler search task into the learning task over a weighted sum, enabling end-to-end training of the model. We also designed an adaptive search algorithm to extensively and efficiently explore the search space. A specific initialization approach is also obtained to better utilize the obtained model parameters during the search stage, which is similar to curriculum learning and leads to better performance and less computation resource consumption. We evaluate the proposed framework on four benchmarks over three models. Extensive experiments demonstrate the effectiveness and efficiency of our proposed framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.03526v1-abstract-full').style.display = 'none'; document.getElementById('2311.03526v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.20490">arXiv:2310.20490</a> <span> [<a href="https://arxiv.org/pdf/2310.20490">pdf</a>, <a href="https://arxiv.org/format/2310.20490">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Long-Tailed Learning as Multi-Objective Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+W">Weiqi Li</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+F">Fanhua Shang</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+L">Liang Wan</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+W">Wei Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.20490v2-abstract-short" style="display: inline;"> Real-world data is extremely imbalanced and presents a long-tailed distribution, resulting in models that are biased towards classes with sufficient samples and perform poorly on rare classes. Recent methods propose to rebalance classes but they undertake the seesaw dilemma (what is increasing performance on tail classes may decrease that of head classes, and vice versa). In this paper, we argue t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.20490v2-abstract-full').style.display = 'inline'; document.getElementById('2310.20490v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.20490v2-abstract-full" style="display: none;"> Real-world data is extremely imbalanced and presents a long-tailed distribution, resulting in models that are biased towards classes with sufficient samples and perform poorly on rare classes. Recent methods propose to rebalance classes but they undertake the seesaw dilemma (what is increasing performance on tail classes may decrease that of head classes, and vice versa). In this paper, we argue that the seesaw dilemma is derived from gradient imbalance of different classes, in which gradients of inappropriate classes are set to important for updating, thus are prone to overcompensation or undercompensation on tail classes. To achieve ideal compensation, we formulate the long-tailed recognition as an multi-objective optimization problem, which fairly respects the contributions of head and tail classes simultaneously. For efficiency, we propose a Gradient-Balancing Grouping (GBG) strategy to gather the classes with similar gradient directions, thus approximately make every update under a Pareto descent direction. Our GBG method drives classes with similar gradient directions to form more representative gradient and provide ideal compensation to the tail classes. Moreover, We conduct extensive experiments on commonly used benchmarks in long-tailed learning and demonstrate the superiority of our method over existing SOTA methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.20490v2-abstract-full').style.display = 'none'; document.getElementById('2310.20490v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In submission</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.20268">arXiv:2310.20268</a> <span> [<a href="https://arxiv.org/pdf/2310.20268">pdf</a>, <a href="https://arxiv.org/format/2310.20268">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Constructing Sample-to-Class Graph for Few-Shot Class-Incremental Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+F">Fuyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jian Zhang</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Linyan Li</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+F">Fenglei Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.20268v1-abstract-short" style="display: inline;"> Few-shot class-incremental learning (FSCIL) aims to build machine learning model that can continually learn new concepts from a few data samples, without forgetting knowledge of old classes. The challenges of FSCIL lies in the limited data of new classes, which not only lead to significant overfitting issues but also exacerbates the notorious catastrophic forgetting problems. As proved in early… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.20268v1-abstract-full').style.display = 'inline'; document.getElementById('2310.20268v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.20268v1-abstract-full" style="display: none;"> Few-shot class-incremental learning (FSCIL) aims to build machine learning model that can continually learn new concepts from a few data samples, without forgetting knowledge of old classes. The challenges of FSCIL lies in the limited data of new classes, which not only lead to significant overfitting issues but also exacerbates the notorious catastrophic forgetting problems. As proved in early studies, building sample relationships is beneficial for learning from few-shot samples. In this paper, we promote the idea to the incremental scenario, and propose a Sample-to-Class (S2C) graph learning method for FSCIL. Specifically, we propose a Sample-level Graph Network (SGN) that focuses on analyzing sample relationships within a single session. This network helps aggregate similar samples, ultimately leading to the extraction of more refined class-level features. Then, we present a Class-level Graph Network (CGN) that establishes connections across class-level features of both new and old classes. This network plays a crucial role in linking the knowledge between different sessions and helps improve overall learning in the FSCIL scenario. Moreover, we design a multi-stage strategy for training S2C model, which mitigates the training challenges posed by limited data in the incremental process. The multi-stage training strategy is designed to build S2C graph from base to few-shot stages, and improve the capacity via an extra pseudo-incremental stage. Experiments on three popular benchmark datasets show that our method clearly outperforms the baselines and sets new state-of-the-art results in FSCIL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.20268v1-abstract-full').style.display = 'none'; document.getElementById('2310.20268v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.19113">arXiv:2310.19113</a> <span> [<a href="https://arxiv.org/pdf/2310.19113">pdf</a>, <a href="https://arxiv.org/format/2310.19113">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Dynamic V2X Autonomous Perception from Road-to-Vehicle Vision </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+J">Jiayao Tan</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Linyan Li</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+F">Fuyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+T">Tingliang Feng</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+F">Fenglei Xu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+R">Rui Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.19113v1-abstract-short" style="display: inline;"> Vehicle-to-everything (V2X) perception is an innovative technology that enhances vehicle perception accuracy, thereby elevating the security and reliability of autonomous systems. However, existing V2X perception methods focus on static scenes from mainly vehicle-based vision, which is constrained by sensor capabilities and communication loads. To adapt V2X perception models to dynamic scenes, we… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.19113v1-abstract-full').style.display = 'inline'; document.getElementById('2310.19113v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.19113v1-abstract-full" style="display: none;"> Vehicle-to-everything (V2X) perception is an innovative technology that enhances vehicle perception accuracy, thereby elevating the security and reliability of autonomous systems. However, existing V2X perception methods focus on static scenes from mainly vehicle-based vision, which is constrained by sensor capabilities and communication loads. To adapt V2X perception models to dynamic scenes, we propose to build V2X perception from road-to-vehicle vision and present Adaptive Road-to-Vehicle Perception (AR2VP) method. In AR2VP,we leverage roadside units to offer stable, wide-range sensing capabilities and serve as communication hubs. AR2VP is devised to tackle both intra-scene and inter-scene changes. For the former, we construct a dynamic perception representing module, which efficiently integrates vehicle perceptions, enabling vehicles to capture a more comprehensive range of dynamic factors within the scene.Moreover, we introduce a road-to-vehicle perception compensating module, aimed at preserving the maximized roadside unit perception information in the presence of intra-scene changes.For inter-scene changes, we implement an experience replay mechanism leveraging the roadside unit's storage capacity to retain a subset of historical scene data, maintaining model robustness in response to inter-scene shifts. We conduct perception experiment on 3D object detection and segmentation, and the results show that AR2VP excels in both performance-bandwidth trade-offs and adaptability within dynamic environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.19113v1-abstract-full').style.display = 'none'; document.getElementById('2310.19113v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.15342">arXiv:2310.15342</a> <span> [<a href="https://arxiv.org/pdf/2310.15342">pdf</a>, <a href="https://arxiv.org/format/2310.15342">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Towards Hybrid-grained Feature Interaction Selection for Deep Sparse Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xing Tang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Dugang Liu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+C">Chen Ma</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+W">Weihong Luo</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Liang Chen</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xiuqiang He</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xue Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.15342v2-abstract-short" style="display: inline;"> Deep sparse networks are widely investigated as a neural network architecture for prediction tasks with high-dimensional sparse features, with which feature interaction selection is a critical component. While previous methods primarily focus on how to search feature interaction in a coarse-grained space, less attention has been given to a finer granularity. In this work, we introduce a hybrid-gra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.15342v2-abstract-full').style.display = 'inline'; document.getElementById('2310.15342v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.15342v2-abstract-full" style="display: none;"> Deep sparse networks are widely investigated as a neural network architecture for prediction tasks with high-dimensional sparse features, with which feature interaction selection is a critical component. While previous methods primarily focus on how to search feature interaction in a coarse-grained space, less attention has been given to a finer granularity. In this work, we introduce a hybrid-grained feature interaction selection approach that targets both feature field and feature value for deep sparse networks. To explore such expansive space, we propose a decomposed space which is calculated on the fly. We then develop a selection algorithm called OptFeature, which efficiently selects the feature interaction from both the feature field and the feature value simultaneously. Results from experiments on three large real-world benchmark datasets demonstrate that OptFeature performs well in terms of accuracy and efficiency. Additional studies support the feasibility of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.15342v2-abstract-full').style.display = 'none'; document.getElementById('2310.15342v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2023 poster</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.13382">arXiv:2306.13382</a> <span> [<a href="https://arxiv.org/pdf/2306.13382">pdf</a>, <a href="https://arxiv.org/format/2306.13382">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> OptMSM: Optimizing Multi-Scenario Modeling for Click-Through Rate Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xing Tang</a>, <a href="/search/cs?searchtype=author&query=Qiao%2C+Y">Yang Qiao</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+Y">Yuwen Fu</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Dugang Liu</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xiuqiang He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.13382v1-abstract-short" style="display: inline;"> A large-scale industrial recommendation platform typically consists of multiple associated scenarios, requiring a unified click-through rate (CTR) prediction model to serve them simultaneously. Existing approaches for multi-scenario CTR prediction generally consist of two main modules: i) a scenario-aware learning module that learns a set of multi-functional representations with scenario-shared an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.13382v1-abstract-full').style.display = 'inline'; document.getElementById('2306.13382v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.13382v1-abstract-full" style="display: none;"> A large-scale industrial recommendation platform typically consists of multiple associated scenarios, requiring a unified click-through rate (CTR) prediction model to serve them simultaneously. Existing approaches for multi-scenario CTR prediction generally consist of two main modules: i) a scenario-aware learning module that learns a set of multi-functional representations with scenario-shared and scenario-specific information from input features, and ii) a scenario-specific prediction module that serves each scenario based on these representations. However, most of these approaches primarily focus on improving the former module and neglect the latter module. This can result in challenges such as increased model parameter size, training difficulty, and performance bottlenecks for each scenario. To address these issues, we propose a novel framework called OptMSM (\textbf{Opt}imizing \textbf{M}ulti-\textbf{S}cenario \textbf{M}odeling). First, we introduce a simplified yet effective scenario-enhanced learning module to alleviate the aforementioned challenges. Specifically, we partition the input features into scenario-specific and scenario-shared features, which are mapped to specific information embedding encodings and a set of shared information embeddings, respectively. By imposing an orthogonality constraint on the shared information embeddings to facilitate the disentanglement of shared information corresponding to each scenario, we combine them with the specific information embeddings to obtain multi-functional representations. Second, we introduce a scenario-specific hypernetwork in the scenario-specific prediction module to capture interactions within each scenario more effectively, thereby alleviating the performance bottlenecks. Finally, we conduct extensive offline experiments and an online A/B test to demonstrate the effectiveness of OptMSM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.13382v1-abstract-full').style.display = 'none'; document.getElementById('2306.13382v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ECML-PKDD 2023 Applied Data Science Track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.00315">arXiv:2306.00315</a> <span> [<a href="https://arxiv.org/pdf/2306.00315">pdf</a>, <a href="https://arxiv.org/format/2306.00315">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Explicit Feature Interaction-aware Uplift Network for Online Marketing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+D">Dugang Liu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xing Tang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+H">Han Gao</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xiuqiang He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.00315v1-abstract-short" style="display: inline;"> As a key component in online marketing, uplift modeling aims to accurately capture the degree to which different treatments motivate different users, such as coupons or discounts, also known as the estimation of individual treatment effect (ITE). In an actual business scenario, the options for treatment may be numerous and complex, and there may be correlations between different treatments. In add… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.00315v1-abstract-full').style.display = 'inline'; document.getElementById('2306.00315v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.00315v1-abstract-full" style="display: none;"> As a key component in online marketing, uplift modeling aims to accurately capture the degree to which different treatments motivate different users, such as coupons or discounts, also known as the estimation of individual treatment effect (ITE). In an actual business scenario, the options for treatment may be numerous and complex, and there may be correlations between different treatments. In addition, each marketing instance may also have rich user and contextual features. However, existing methods still fall short in both fully exploiting treatment information and mining features that are sensitive to a particular treatment. In this paper, we propose an explicit feature interaction-aware uplift network (EFIN) to address these two problems. Our EFIN includes four customized modules: 1) a feature encoding module encodes not only the user and contextual features, but also the treatment features; 2) a self-interaction module aims to accurately model the user's natural response with all but the treatment features; 3) a treatment-aware interaction module accurately models the degree to which a particular treatment motivates a user through interactions between the treatment features and other features, i.e., ITE; and 4) an intervention constraint module is used to balance the ITE distribution of users between the control and treatment groups so that the model would still achieve a accurate uplift ranking on data collected from a non-random intervention marketing scenario. We conduct extensive experiments on two public datasets and one product dataset to verify the effectiveness of our EFIN. In addition, our EFIN has been deployed in a credit card bill payment scenario of a large online financial platform with a significant improvement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.00315v1-abstract-full').style.display = 'none'; document.getElementById('2306.00315v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by SIGKDD 2023 Applied Data Science Track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.13862">arXiv:2303.13862</a> <span> [<a href="https://arxiv.org/pdf/2303.13862">pdf</a>, <a href="https://arxiv.org/format/2303.13862">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Two-level Graph Network for Few-Shot Class-Incremental Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hao Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Linyan Li</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+F">Fuyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Z">Zhenping Xia</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+F">Fenglei Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.13862v1-abstract-short" style="display: inline;"> Few-shot class-incremental learning (FSCIL) aims to design machine learning algorithms that can continually learn new concepts from a few data points, without forgetting knowledge of old classes. The difficulty lies in that limited data from new classes not only lead to significant overfitting issues but also exacerbates the notorious catastrophic forgetting problems. However, existing FSCIL metho… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.13862v1-abstract-full').style.display = 'inline'; document.getElementById('2303.13862v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.13862v1-abstract-full" style="display: none;"> Few-shot class-incremental learning (FSCIL) aims to design machine learning algorithms that can continually learn new concepts from a few data points, without forgetting knowledge of old classes. The difficulty lies in that limited data from new classes not only lead to significant overfitting issues but also exacerbates the notorious catastrophic forgetting problems. However, existing FSCIL methods ignore the semantic relationships between sample-level and class-level. % Using the advantage that graph neural network (GNN) can mine rich information among few samples, In this paper, we designed a two-level graph network for FSCIL named Sample-level and Class-level Graph Neural Network (SCGN). Specifically, a pseudo incremental learning paradigm is designed in SCGN, which synthesizes virtual few-shot tasks as new tasks to optimize SCGN model parameters in advance. Sample-level graph network uses the relationship of a few samples to aggregate similar samples and obtains refined class-level features. Class-level graph network aims to mitigate the semantic conflict between prototype features of new classes and old classes. SCGN builds two-level graph networks to guarantee the latent semantic of each few-shot class can be effectively represented in FSCIL. Experiments on three popular benchmark datasets show that our method significantly outperforms the baselines and sets new state-of-the-art results with remarkable advantages. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.13862v1-abstract-full').style.display = 'none'; document.getElementById('2303.13862v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2203.06953 by other authors</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.02954">arXiv:2303.02954</a> <span> [<a href="https://arxiv.org/pdf/2303.02954">pdf</a>, <a href="https://arxiv.org/format/2303.02954">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Centroid Distance Distillation for Effective Rehearsal in Continual Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+D">Daofeng Liu</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Linyan Li</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Z">Zhenping Xia</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+F">Fuyuan Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.02954v1-abstract-short" style="display: inline;"> Rehearsal, retraining on a stored small data subset of old tasks, has been proven effective in solving catastrophic forgetting in continual learning. However, due to the sampled data may have a large bias towards the original dataset, retraining them is susceptible to driving continual domain drift of old tasks in feature space, resulting in forgetting. In this paper, we focus on tackling the cont… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.02954v1-abstract-full').style.display = 'inline'; document.getElementById('2303.02954v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.02954v1-abstract-full" style="display: none;"> Rehearsal, retraining on a stored small data subset of old tasks, has been proven effective in solving catastrophic forgetting in continual learning. However, due to the sampled data may have a large bias towards the original dataset, retraining them is susceptible to driving continual domain drift of old tasks in feature space, resulting in forgetting. In this paper, we focus on tackling the continual domain drift problem with centroid distance distillation. First, we propose a centroid caching mechanism for sampling data points based on constructed centroids to reduce the sample bias in rehearsal. Then, we present a centroid distance distillation that only stores the centroid distance to reduce the continual domain drift. The experiments on four continual learning datasets show the superiority of the proposed method, and the continual domain drift can be reduced. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.02954v1-abstract-full').style.display = 'none'; document.getElementById('2303.02954v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.02241">arXiv:2302.02241</a> <span> [<a href="https://arxiv.org/pdf/2302.02241">pdf</a>, <a href="https://arxiv.org/format/2302.02241">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Feature Representation Learning for Click-through Rate Prediction: A Review and New Perspectives </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xing Tang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Dugang Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Haolun Wu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+C">Chen Ma</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xiuqiang He</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xue Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.02241v1-abstract-short" style="display: inline;"> Representation learning has been a critical topic in machine learning. In Click-through Rate Prediction, most features are represented as embedding vectors and learned simultaneously with other parameters in the model. With the development of CTR models, feature representation learning has become a trending topic and has been extensively studied by both industrial and academic researchers in recen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.02241v1-abstract-full').style.display = 'inline'; document.getElementById('2302.02241v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.02241v1-abstract-full" style="display: none;"> Representation learning has been a critical topic in machine learning. In Click-through Rate Prediction, most features are represented as embedding vectors and learned simultaneously with other parameters in the model. With the development of CTR models, feature representation learning has become a trending topic and has been extensively studied by both industrial and academic researchers in recent years. This survey aims at summarizing the feature representation learning in a broader picture and pave the way for future research. To achieve such a goal, we first present a taxonomy of current research methods on feature representation learning following two main issues: (i) which feature to represent and (ii) how to represent these features. Then we give a detailed description of each method regarding these two issues. Finally, the review concludes with a discussion on the future directions of this field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.02241v1-abstract-full').style.display = 'none'; document.getElementById('2302.02241v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to IJCAI 2023 Survey Track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2301.10909">arXiv:2301.10909</a> <span> [<a href="https://arxiv.org/pdf/2301.10909">pdf</a>, <a href="https://arxiv.org/format/2301.10909">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Optimizing Feature Set for Click-Through Rate Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xing Tang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Dugang Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Liang Chen</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xiuqiang He</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xue Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2301.10909v2-abstract-short" style="display: inline;"> Click-through prediction (CTR) models transform features into latent vectors and enumerate possible feature interactions to improve performance based on the input feature set. Therefore, when selecting an optimal feature set, we should consider the influence of both feature and its interaction. However, most previous works focus on either feature field selection or only select feature interaction… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.10909v2-abstract-full').style.display = 'inline'; document.getElementById('2301.10909v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2301.10909v2-abstract-full" style="display: none;"> Click-through prediction (CTR) models transform features into latent vectors and enumerate possible feature interactions to improve performance based on the input feature set. Therefore, when selecting an optimal feature set, we should consider the influence of both feature and its interaction. However, most previous works focus on either feature field selection or only select feature interaction based on the fixed feature set to produce the feature set. The former restricts search space to the feature field, which is too coarse to determine subtle features. They also do not filter useless feature interactions, leading to higher computation costs and degraded model performance. The latter identifies useful feature interaction from all available features, resulting in many redundant features in the feature set. In this paper, we propose a novel method named OptFS to address these problems. To unify the selection of feature and its interaction, we decompose the selection of each feature interaction into the selection of two correlated features. Such a decomposition makes the model end-to-end trainable given various feature interaction operations. By adopting feature-level search space, we set a learnable gate to determine whether each feature should be within the feature set. Because of the large-scale search space, we develop a learning-by-continuation training scheme to learn such gates. Hence, OptFS generates the feature set only containing features which improve the final prediction results. Experimentally, we evaluate OptFS on three public datasets, demonstrating OptFS can optimize feature sets which enhance the model performance and further reduce both the storage and computational cost. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2301.10909v2-abstract-full').style.display = 'none'; document.getElementById('2301.10909v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by WWW 2023 Research Tracks</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.14464">arXiv:2212.14464</a> <span> [<a href="https://arxiv.org/pdf/2212.14464">pdf</a>, <a href="https://arxiv.org/format/2212.14464">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Result Diversification in Search and Recommendation: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+H">Haolun Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yansen Zhang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+C">Chen Ma</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=He%2C+B">Bowei He</a>, <a href="/search/cs?searchtype=author&query=Mitra%2C+B">Bhaskar Mitra</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xue Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.14464v4-abstract-short" style="display: inline;"> Diversifying return results is an important research topic in retrieval systems in order to satisfy both the various interests of customers and the equal market exposure of providers. There has been growing attention on diversity-aware research during recent years, accompanied by a proliferation of literature on methods to promote diversity in search and recommendation. However, diversity-aware st… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.14464v4-abstract-full').style.display = 'inline'; document.getElementById('2212.14464v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.14464v4-abstract-full" style="display: none;"> Diversifying return results is an important research topic in retrieval systems in order to satisfy both the various interests of customers and the equal market exposure of providers. There has been growing attention on diversity-aware research during recent years, accompanied by a proliferation of literature on methods to promote diversity in search and recommendation. However, diversity-aware studies in retrieval systems lack a systematic organization and are rather fragmented. In this survey, we are the first to propose a unified taxonomy for classifying the metrics and approaches of diversification in both search and recommendation, which are two of the most extensively researched fields of retrieval systems. We begin the survey with a brief discussion of why diversity is important in retrieval systems, followed by a summary of the various diversity concerns in search and recommendation, highlighting their relationship and differences. For the survey's main body, we present a unified taxonomy of diversification metrics and approaches in retrieval systems, from both the search and recommendation perspectives. In the later part of the survey, we discuss the open research questions of diversity-aware research in search and recommendation in an effort to inspire future innovations and encourage the implementation of diversity in real-world systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.14464v4-abstract-full').style.display = 'none'; document.getElementById('2212.14464v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2211.14763">arXiv:2211.14763</a> <span> [<a href="https://arxiv.org/pdf/2211.14763">pdf</a>, <a href="https://arxiv.org/format/2211.14763">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Multi-Label Continual Learning using Augmented Graph Convolutional Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+K">Kaile Du</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Linyan Li</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+F">Fuyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+W">Wei Feng</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+F">Fenglei Xu</a>, <a href="/search/cs?searchtype=author&query=Xi%2C+X">Xuefeng Xi</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+H">Hanjing Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2211.14763v1-abstract-short" style="display: inline;"> Multi-Label Continual Learning (MLCL) builds a class-incremental framework in a sequential multi-label image recognition data stream. The critical challenges of MLCL are the construction of label relationships on past-missing and future-missing partial labels of training data and the catastrophic forgetting on old classes, resulting in poor generalization. To solve the problems, the study proposes… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.14763v1-abstract-full').style.display = 'inline'; document.getElementById('2211.14763v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2211.14763v1-abstract-full" style="display: none;"> Multi-Label Continual Learning (MLCL) builds a class-incremental framework in a sequential multi-label image recognition data stream. The critical challenges of MLCL are the construction of label relationships on past-missing and future-missing partial labels of training data and the catastrophic forgetting on old classes, resulting in poor generalization. To solve the problems, the study proposes an Augmented Graph Convolutional Network (AGCN++) that can construct the cross-task label relationships in MLCL and sustain catastrophic forgetting. First, we build an Augmented Correlation Matrix (ACM) across all seen classes, where the intra-task relationships derive from the hard label statistics. In contrast, the inter-task relationships leverage hard and soft labels from data and a constructed expert network. Then, we propose a novel partial label encoder (PLE) for MLCL, which can extract dynamic class representation for each partial label image as graph nodes and help generate soft labels to create a more convincing ACM and suppress forgetting. Last, to suppress the forgetting of label dependencies across old tasks, we propose a relationship-preserving constrainter to construct label relationships. The inter-class topology can be augmented automatically, which also yields effective class representations. The proposed method is evaluated using two multi-label image benchmarks. The experimental results show that the proposed way is effective for MLCL image recognition and can build convincing correlations across tasks even if the labels of previous tasks are missing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2211.14763v1-abstract-full').style.display = 'none'; document.getElementById('2211.14763v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 November, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2210.10581">arXiv:2210.10581</a> <span> [<a href="https://arxiv.org/pdf/2210.10581">pdf</a>, <a href="https://arxiv.org/format/2210.10581">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> CEntRE: A paragraph-level Chinese dataset for Relation Extraction among Enterprises </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+P">Peipei Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hong Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhiyu Wang</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Y">Yimo Ren</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jie Liu</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fei Lyu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+H">Hongsong Zhu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+L">Limin Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2210.10581v1-abstract-short" style="display: inline;"> Enterprise relation extraction aims to detect pairs of enterprise entities and identify the business relations between them from unstructured or semi-structured text data, and it is crucial for several real-world applications such as risk analysis, rating research and supply chain security. However, previous work mainly focuses on getting attribute information about enterprises like personnel and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.10581v1-abstract-full').style.display = 'inline'; document.getElementById('2210.10581v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2210.10581v1-abstract-full" style="display: none;"> Enterprise relation extraction aims to detect pairs of enterprise entities and identify the business relations between them from unstructured or semi-structured text data, and it is crucial for several real-world applications such as risk analysis, rating research and supply chain security. However, previous work mainly focuses on getting attribute information about enterprises like personnel and corporate business, and pays little attention to enterprise relation extraction. To encourage further progress in the research, we introduce the CEntRE, a new dataset constructed from publicly available business news data with careful human annotation and intelligent data processing. Extensive experiments on CEntRE with six excellent models demonstrate the challenges of our proposed dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2210.10581v1-abstract-full').style.display = 'none'; document.getElementById('2210.10581v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.12241">arXiv:2209.12241</a> <span> [<a href="https://arxiv.org/pdf/2209.12241">pdf</a>, <a href="https://arxiv.org/format/2209.12241">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Exploring Example Influence in Continual Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+Q">Qing Sun</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+F">Fanhua Shang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+W">Wei Feng</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+L">Liang Wan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.12241v1-abstract-short" style="display: inline;"> Continual Learning (CL) sequentially learns new tasks like human beings, with the goal to achieve better Stability (S, remembering past tasks) and Plasticity (P, adapting to new tasks). Due to the fact that past training data is not available, it is valuable to explore the influence difference on S and P among training examples, which may improve the learning pattern towards better SP. Inspired by… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.12241v1-abstract-full').style.display = 'inline'; document.getElementById('2209.12241v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.12241v1-abstract-full" style="display: none;"> Continual Learning (CL) sequentially learns new tasks like human beings, with the goal to achieve better Stability (S, remembering past tasks) and Plasticity (P, adapting to new tasks). Due to the fact that past training data is not available, it is valuable to explore the influence difference on S and P among training examples, which may improve the learning pattern towards better SP. Inspired by Influence Function (IF), we first study example influence via adding perturbation to example weight and computing the influence derivation. To avoid the storage and calculation burden of Hessian inverse in neural networks, we propose a simple yet effective MetaSP algorithm to simulate the two key steps in the computation of IF and obtain the S- and P-aware example influence. Moreover, we propose to fuse two kinds of example influence by solving a dual-objective optimization problem, and obtain a fused influence towards SP Pareto optimality. The fused influence can be used to control the update of model and optimize the storage of rehearsal. Empirical results show that our algorithm significantly outperforms state-of-the-art methods on both task- and class-incremental benchmark CL datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.12241v1-abstract-full').style.display = 'none'; document.getElementById('2209.12241v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at NeurIPS 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.04482">arXiv:2208.04482</a> <span> [<a href="https://arxiv.org/pdf/2208.04482">pdf</a>, <a href="https://arxiv.org/format/2208.04482">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3511808.3557411">10.1145/3511808.3557411 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> OptEmbed: Learning Optimal Embedding Table for Click-through Rate Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xing Tang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+H">Hong Zhu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+H">Huifeng Guo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yingxue Zhang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+R">Ruiming Tang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xue Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.04482v2-abstract-short" style="display: inline;"> Learning embedding table plays a fundamental role in Click-through rate(CTR) prediction from the view of the model performance and memory usage. The embedding table is a two-dimensional tensor, with its axes indicating the number of feature values and the embedding dimension, respectively. To learn an efficient and effective embedding table, recent works either assign various embedding dimensions… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.04482v2-abstract-full').style.display = 'inline'; document.getElementById('2208.04482v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.04482v2-abstract-full" style="display: none;"> Learning embedding table plays a fundamental role in Click-through rate(CTR) prediction from the view of the model performance and memory usage. The embedding table is a two-dimensional tensor, with its axes indicating the number of feature values and the embedding dimension, respectively. To learn an efficient and effective embedding table, recent works either assign various embedding dimensions for feature fields and reduce the number of embeddings respectively or mask the embedding table parameters. However, all these existing works cannot get an optimal embedding table. On the one hand, various embedding dimensions still require a large amount of memory due to the vast number of features in the dataset. On the other hand, decreasing the number of embeddings usually suffers from performance degradation, which is intolerable in CTR prediction. Finally, pruning embedding parameters will lead to a sparse embedding table, which is hard to be deployed. To this end, we propose an optimal embedding table learning framework OptEmbed, which provides a practical and general method to find an optimal embedding table for various base CTR models. Specifically, we propose pruning the redundant embeddings regarding corresponding features' importance by learnable pruning thresholds. Furthermore, we consider assigning various embedding dimensions as one single candidate architecture. To efficiently search the optimal embedding dimensions, we design a uniform embedding dimension sampling scheme to equally train all candidate architectures, meaning architecture-related parameters and learnable thresholds are trained simultaneously in one supernet. We then propose an evolution search method based on the supernet to find the optimal embedding dimensions for each field. Experiments on public datasets show that OptEmbed can learn a compact embedding table which can further improve the model performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.04482v2-abstract-full').style.display = 'none'; document.getElementById('2208.04482v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 September, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CIKM 2022 Research Track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.07840">arXiv:2207.07840</a> <span> [<a href="https://arxiv.org/pdf/2207.07840">pdf</a>, <a href="https://arxiv.org/format/2207.07840">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Class-Incremental Lifelong Learning in Multi-Label Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+K">Kaile Du</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Linyan Li</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+F">Fuyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Z">Zhenping Xia</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+F">Fenglei Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.07840v1-abstract-short" style="display: inline;"> Existing class-incremental lifelong learning studies only the data is with single-label, which limits its adaptation to multi-label data. This paper studies Lifelong Multi-Label (LML) classification, which builds an online class-incremental classifier in a sequential multi-label classification data stream. Training on the data with Partial Labels in LML classification may result in more serious Ca… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.07840v1-abstract-full').style.display = 'inline'; document.getElementById('2207.07840v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.07840v1-abstract-full" style="display: none;"> Existing class-incremental lifelong learning studies only the data is with single-label, which limits its adaptation to multi-label data. This paper studies Lifelong Multi-Label (LML) classification, which builds an online class-incremental classifier in a sequential multi-label classification data stream. Training on the data with Partial Labels in LML classification may result in more serious Catastrophic Forgetting in old classes. To solve the problem, the study proposes an Augmented Graph Convolutional Network (AGCN) with a built Augmented Correlation Matrix (ACM) across sequential partial-label tasks. The results of two benchmarks show that the method is effective for LML classification and reducing forgetting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.07840v1-abstract-full').style.display = 'none'; document.getElementById('2207.07840v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:2203.05534</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.10480">arXiv:2203.10480</a> <span> [<a href="https://arxiv.org/pdf/2203.10480">pdf</a>, <a href="https://arxiv.org/format/2203.10480">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Encoder-Decoder Architecture for Supervised Dynamic Graph Learning: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yuecai Zhu</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+C">Chengming Hu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xi Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xue Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.10480v2-abstract-short" style="display: inline;"> In recent years, the prevalent online services generate a sheer volume of user activity data. Service providers collect these data in order to perform client behavior analysis, and offer better and more customized services. Majority of these data can be modeled and stored as graph, such as the social graph in Facebook, user-video interaction graph in Youtube. These graphs need to evolve over time… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.10480v2-abstract-full').style.display = 'inline'; document.getElementById('2203.10480v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.10480v2-abstract-full" style="display: none;"> In recent years, the prevalent online services generate a sheer volume of user activity data. Service providers collect these data in order to perform client behavior analysis, and offer better and more customized services. Majority of these data can be modeled and stored as graph, such as the social graph in Facebook, user-video interaction graph in Youtube. These graphs need to evolve over time to capture the dynamics in the real world, leading to the invention of dynamic graphs. However, the temporal information embedded in the dynamic graphs brings new challenges in analyzing and deploying them. Events staleness, temporal information learning and explicit time dimension usage are some example challenges in dynamic graph learning. In order to offer a convenient reference to both the industry and academia, this survey presents the Three Stages Recurrent Temporal Learning Framework based on dynamic graph evolution theories, so as to interpret the learning of temporal information with a generalized framework. Under this framework, this survey categories and reviews different learnable encoder-decoder architectures for supervised dynamic graph learning. We believe that this survey could supply useful guidelines to researchers and engineers in finding suitable graph structures for their dynamic learning tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.10480v2-abstract-full').style.display = 'none'; document.getElementById('2203.10480v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Optimize title for better visibility</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.05534">arXiv:2203.05534</a> <span> [<a href="https://arxiv.org/pdf/2203.05534">pdf</a>, <a href="https://arxiv.org/format/2203.05534">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AGCN: Augmented Graph Convolutional Network for Lifelong Multi-label Image Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+K">Kaile Du</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+F">Fuyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Linyan Li</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+W">Wei Feng</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+F">Fenglei Xu</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+Q">Qiming Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.05534v2-abstract-short" style="display: inline;"> The Lifelong Multi-Label (LML) image recognition builds an online class-incremental classifier in a sequential multi-label image recognition data stream. The key challenges of LML image recognition are the construction of label relationships on Partial Labels of training data and the Catastrophic Forgetting on old classes, resulting in poor generalization. To solve the problems, the study proposes… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.05534v2-abstract-full').style.display = 'inline'; document.getElementById('2203.05534v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.05534v2-abstract-full" style="display: none;"> The Lifelong Multi-Label (LML) image recognition builds an online class-incremental classifier in a sequential multi-label image recognition data stream. The key challenges of LML image recognition are the construction of label relationships on Partial Labels of training data and the Catastrophic Forgetting on old classes, resulting in poor generalization. To solve the problems, the study proposes an Augmented Graph Convolutional Network (AGCN) model that can construct the label relationships across the sequential recognition tasks and sustain the catastrophic forgetting. First, we build an Augmented Correlation Matrix (ACM) across all seen classes, where the intra-task relationships derive from the hard label statistics while the inter-task relationships leverage both hard and soft labels from data and a constructed expert network. Then, based on the ACM, the proposed AGCN captures label dependencies with dynamic augmented structure and yields effective class representations. Last, to suppress the forgetting of label dependencies across old tasks, we propose a relationship-preserving loss as a constraint to the construction of label relationships. The proposed method is evaluated using two multi-label image benchmarks and the experimental results show that the proposed method is effective for LML image recognition and can build convincing correlation across tasks even if the labels of previous tasks are missing. Our code is available at https://github.com/Kaile-Du/AGCN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.05534v2-abstract-full').style.display = 'none'; document.getElementById('2203.05534v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accpted in ICME 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.11159">arXiv:2110.11159</a> <span> [<a href="https://arxiv.org/pdf/2110.11159">pdf</a>, <a href="https://arxiv.org/format/2110.11159">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Each Attribute Matters: Contrastive Attention for Sentence-based Image Editing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+L">Liuqing Zhao</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+F">Fuyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaizhu Huang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+F">Fenglei Xu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Linyan Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.11159v1-abstract-short" style="display: inline;"> Sentence-based Image Editing (SIE) aims to deploy natural language to edit an image. Offering potentials to reduce expensive manual editing, SIE has attracted much interest recently. However, existing methods can hardly produce accurate editing and even lead to failures in attribute editing when the query sentence is with multiple editable attributes. To cope with this problem, by focusing on enha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.11159v1-abstract-full').style.display = 'inline'; document.getElementById('2110.11159v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.11159v1-abstract-full" style="display: none;"> Sentence-based Image Editing (SIE) aims to deploy natural language to edit an image. Offering potentials to reduce expensive manual editing, SIE has attracted much interest recently. However, existing methods can hardly produce accurate editing and even lead to failures in attribute editing when the query sentence is with multiple editable attributes. To cope with this problem, by focusing on enhancing the difference between attributes, this paper proposes a novel model called Contrastive Attention Generative Adversarial Network (CA-GAN), which is inspired from contrastive training. Specifically, we first design a novel contrastive attention module to enlarge the editing difference between random combinations of attributes which are formed during training. We then construct an attribute discriminator to ensure effective editing on each attribute. A series of experiments show that our method can generate very encouraging results in sentence-based image editing with multiple attributes on CUB and COCO dataset. Our code is available at https://github.com/Zlq2021/CA-GAN <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.11159v1-abstract-full').style.display = 'none'; document.getElementById('2110.11159v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by BMVC 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2108.01265">arXiv:2108.01265</a> <span> [<a href="https://arxiv.org/pdf/2108.01265">pdf</a>, <a href="https://arxiv.org/format/2108.01265">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Memorize, Factorize, or be Na茂ve: Learning Optimal Feature Interaction Methods for CTR Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fuyuan Lyu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xing Tang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+H">Huifeng Guo</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+R">Ruiming Tang</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xiuqiang He</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rui Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xue Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2108.01265v3-abstract-short" style="display: inline;"> Click-through rate prediction is one of the core tasks in commercial recommender systems. It aims to predict the probability of a user clicking a particular item given user and item features. As feature interactions bring in non-linearity, they are widely adopted to improve the performance of CTR prediction models. Therefore, effectively modelling feature interactions has attracted much attention… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.01265v3-abstract-full').style.display = 'inline'; document.getElementById('2108.01265v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2108.01265v3-abstract-full" style="display: none;"> Click-through rate prediction is one of the core tasks in commercial recommender systems. It aims to predict the probability of a user clicking a particular item given user and item features. As feature interactions bring in non-linearity, they are widely adopted to improve the performance of CTR prediction models. Therefore, effectively modelling feature interactions has attracted much attention in both the research and industry field. The current approaches can generally be categorized into three classes: (1) na茂ve methods, which do not model feature interactions and only use original features; (2) memorized methods, which memorize feature interactions by explicitly viewing them as new features and assigning trainable embeddings; (3) factorized methods, which learn latent vectors for original features and implicitly model feature interactions through factorization functions. Studies have shown that modelling feature interactions by one of these methods alone are suboptimal due to the unique characteristics of different feature interactions. To address this issue, we first propose a general framework called OptInter which finds the most suitable modelling method for each feature interaction. Different state-of-the-art deep CTR models can be viewed as instances of OptInter. To realize the functionality of OptInter, we also introduce a learning algorithm that automatically searches for the optimal modelling method. We conduct extensive experiments on four large datasets. Our experiments show that OptInter improves the best performed state-of-the-art baseline deep CTR models by up to 2.21%. Compared to the memorized method, which also outperforms baselines, we reduce up to 91% parameters. In addition, we conduct several ablation studies to investigate the influence of different components of OptInter. Finally, we provide interpretable discussions on the results of OptInter. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.01265v3-abstract-full').style.display = 'none'; document.getElementById('2108.01265v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in ICDE 2022</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.08605">arXiv:2106.08605</a> <span> [<a href="https://arxiv.org/pdf/2106.08605">pdf</a>, <a href="https://arxiv.org/format/2106.08605">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TMM.2021.3089017">10.1109/TMM.2021.3089017 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Disentangling Semantic-to-visual Confusion for Zero-shot Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+Z">Zihan Ye</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+F">Fuyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Linyan Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaizhu Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.08605v1-abstract-short" style="display: inline;"> Using generative models to synthesize visual features from semantic distribution is one of the most popular solutions to ZSL image classification in recent years. The triplet loss (TL) is popularly used to generate realistic visual distributions from semantics by automatically searching discriminative representations. However, the traditional TL cannot search reliable unseen disentangled represent… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.08605v1-abstract-full').style.display = 'inline'; document.getElementById('2106.08605v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.08605v1-abstract-full" style="display: none;"> Using generative models to synthesize visual features from semantic distribution is one of the most popular solutions to ZSL image classification in recent years. The triplet loss (TL) is popularly used to generate realistic visual distributions from semantics by automatically searching discriminative representations. However, the traditional TL cannot search reliable unseen disentangled representations due to the unavailability of unseen classes in ZSL. To alleviate this drawback, we propose in this work a multi-modal triplet loss (MMTL) which utilizes multimodal information to search a disentangled representation space. As such, all classes can interplay which can benefit learning disentangled class representations in the searched space. Furthermore, we develop a novel model called Disentangling Class Representation Generative Adversarial Network (DCR-GAN) focusing on exploiting the disentangled representations in training, feature synthesis, and final recognition stages. Benefiting from the disentangled representations, DCR-GAN could fit a more realistic distribution over both seen and unseen features. Extensive experiments show that our proposed model can lead to superior performance to the state-of-the-arts on four benchmark datasets. Our code is available at https://github.com/FouriYe/DCRGAN-TMM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.08605v1-abstract-full').style.display = 'none'; document.getElementById('2106.08605v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE TRANSACTIONS ON MULTIMEDIA (TMM) in 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.06528">arXiv:2102.06528</a> <span> [<a href="https://arxiv.org/pdf/2102.06528">pdf</a>, <a href="https://arxiv.org/format/2102.06528">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> A Tale of Two Countries: A Longitudinal Cross-Country Study of Mobile Users' Reactions to the COVID-19 Pandemic Through the Lens of App Popularity </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liu Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haoyu Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yi Wang</a>, <a href="/search/cs?searchtype=author&query=Tyson%2C+G">Gareth Tyson</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fei Lyu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.06528v2-abstract-short" style="display: inline;"> The ongoing COVID-19 pandemic has profoundly impacted people's life around the world, including how they interact with mobile technologies. In this paper, we seek to develop an understanding of how the dynamic trajectory of a pandemic shapes mobile phone users' experiences. Through the lens of app popularity, we approach this goal from a cross-country perspective. We compile a dataset consisting o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.06528v2-abstract-full').style.display = 'inline'; document.getElementById('2102.06528v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.06528v2-abstract-full" style="display: none;"> The ongoing COVID-19 pandemic has profoundly impacted people's life around the world, including how they interact with mobile technologies. In this paper, we seek to develop an understanding of how the dynamic trajectory of a pandemic shapes mobile phone users' experiences. Through the lens of app popularity, we approach this goal from a cross-country perspective. We compile a dataset consisting of six-month daily snapshots of the most popular apps in the iOS App Store in China and the US, where the pandemic has exhibited distinct trajectories. Using this longitudinal dataset, our analysis provides detailed patterns of app ranking during the pandemic at both category and individual app levels. We reveal that app categories' rankings are correlated with the pandemic, contingent upon country-specific development trajectories. Our work offers rich insights into how the COVID-19, a typical global public health crisis, has influence people's day-to-day interaction with the Internet and mobile technologies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.06528v2-abstract-full').style.display = 'none'; document.getElementById('2102.06528v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.13662">arXiv:2012.13662</a> <span> [<a href="https://arxiv.org/pdf/2012.13662">pdf</a>, <a href="https://arxiv.org/format/2012.13662">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Coarse to Fine: Multi-label Image Classification with Global/Local Attention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+F">Fuyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Sheng%2C+V+S">Victor S. Sheng</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhengtian Wu</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+Q">Qiming Fu</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+B">Baochuan Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.13662v1-abstract-short" style="display: inline;"> In our daily life, the scenes around us are always with multiple labels especially in a smart city, i.e., recognizing the information of city operation to response and control. Great efforts have been made by using Deep Neural Networks to recognize multi-label images. Since multi-label image classification is very complicated, people seek to use the attention mechanism to guide the classification… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.13662v1-abstract-full').style.display = 'inline'; document.getElementById('2012.13662v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.13662v1-abstract-full" style="display: none;"> In our daily life, the scenes around us are always with multiple labels especially in a smart city, i.e., recognizing the information of city operation to response and control. Great efforts have been made by using Deep Neural Networks to recognize multi-label images. Since multi-label image classification is very complicated, people seek to use the attention mechanism to guide the classification process. However, conventional attention-based methods always analyzed images directly and aggressively. It is difficult for them to well understand complicated scenes. In this paper, we propose a global/local attention method that can recognize an image from coarse to fine by mimicking how human-beings observe images. Specifically, our global/local attention method first concentrates on the whole image, and then focuses on local specific objects in the image. We also propose a joint max-margin objective function, which enforces that the minimum score of positive labels should be larger than the maximum score of negative labels horizontally and vertically. This function can further improve our multi-label image classification method. We evaluate the effectiveness of our method on two popular multi-label image datasets (i.e., Pascal VOC and MS-COCO). Our experimental results show that our method outperforms state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.13662v1-abstract-full').style.display = 'none'; document.getElementById('2012.13662v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IEEE International Smart Cities Conference 2018</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.07236">arXiv:2012.07236</a> <span> [<a href="https://arxiv.org/pdf/2012.07236">pdf</a>, <a href="https://arxiv.org/format/2012.07236">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Multi-Domain Multi-Task Rehearsal for Lifelong Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Fan Lyu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuai Wang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+W">Wei Feng</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Z">Zihan Ye</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+F">Fuyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Song Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.07236v1-abstract-short" style="display: inline;"> Rehearsal, seeking to remind the model by storing old knowledge in lifelong learning, is one of the most effective ways to mitigate catastrophic forgetting, i.e., biased forgetting of previous knowledge when moving to new tasks. However, the old tasks of the most previous rehearsal-based methods suffer from the unpredictable domain shift when training the new task. This is because these methods al… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.07236v1-abstract-full').style.display = 'inline'; document.getElementById('2012.07236v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.07236v1-abstract-full" style="display: none;"> Rehearsal, seeking to remind the model by storing old knowledge in lifelong learning, is one of the most effective ways to mitigate catastrophic forgetting, i.e., biased forgetting of previous knowledge when moving to new tasks. However, the old tasks of the most previous rehearsal-based methods suffer from the unpredictable domain shift when training the new task. This is because these methods always ignore two significant factors. First, the Data Imbalance between the new task and old tasks that makes the domain of old tasks prone to shift. Second, the Task Isolation among all tasks will make the domain shift toward unpredictable directions; To address the unpredictable domain shift, in this paper, we propose Multi-Domain Multi-Task (MDMT) rehearsal to train the old tasks and new task parallelly and equally to break the isolation among tasks. Specifically, a two-level angular margin loss is proposed to encourage the intra-class/task compactness and inter-class/task discrepancy, which keeps the model from domain chaos. In addition, to further address domain shift of the old tasks, we propose an optional episodic distillation loss on the memory to anchor the knowledge for each old task. Experiments on benchmark datasets validate the proposed approach can effectively mitigate the unpredictable domain shift. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.07236v1-abstract-full').style.display = 'none'; document.getElementById('2012.07236v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.01471">arXiv:2010.01471</a> <span> [<a href="https://arxiv.org/pdf/2010.01471">pdf</a>, <a href="https://arxiv.org/ps/2010.01471">ps</a>, <a href="https://arxiv.org/format/2010.01471">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Deep Reinforcement Learning for Delay-Oriented IoT Task Scheduling in Space-Air-Ground Integrated Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+C">Conghao Zhou</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+W">Wen Wu</a>, <a href="/search/cs?searchtype=author&query=He%2C+H">Hongli He</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+P">Peng Yang</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+F">Feng Lyu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+N">Nan Cheng</a>, <a href="/search/cs?searchtype=author&query=Xuemin"> Xuemin</a>, <a href="/search/cs?searchtype=author&query=Shen"> Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.01471v1-abstract-short" style="display: inline;"> In this paper, we investigate a computing task scheduling problem in space-air-ground integrated network (SAGIN) for delay-oriented Internet of Things (IoT) services. In the considered scenario, an unmanned aerial vehicle (UAV) collects computing tasks from IoT devices and then makes online offloading decisions, in which the tasks can be processed at the UAV or offloaded to the nearby base station… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.01471v1-abstract-full').style.display = 'inline'; document.getElementById('2010.01471v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.01471v1-abstract-full" style="display: none;"> In this paper, we investigate a computing task scheduling problem in space-air-ground integrated network (SAGIN) for delay-oriented Internet of Things (IoT) services. In the considered scenario, an unmanned aerial vehicle (UAV) collects computing tasks from IoT devices and then makes online offloading decisions, in which the tasks can be processed at the UAV or offloaded to the nearby base station or the remote satellite. Our objective is to design a task scheduling policy that minimizes offloading and computing delay of all tasks given the UAV energy capacity constraint. To this end, we first formulate the online scheduling problem as an energy-constrained Markov decision process (MDP). Then, considering the task arrival dynamics, we develop a novel deep risk-sensitive reinforcement learning algorithm. Specifically, the algorithm evaluates the risk, which measures the energy consumption that exceeds the constraint, for each state and searches the optimal parameter weighing the minimization of delay and risk while learning the optimal policy. Extensive simulation results demonstrate that the proposed algorithm can reduce the task processing delay by up to 30% compared to probabilistic configuration methods while satisfying the UAV energy capacity constraint. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.01471v1-abstract-full').style.display = 'none'; document.getElementById('2010.01471v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 8 figures</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Lyu%2C+F&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Lyu%2C+F&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Lyu%2C+F&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository