CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 180 results for author: <span class="mathjax">Hua, Y</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Hua%2C+Y">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Hua, Y"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Hua%2C+Y&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Hua, Y"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Hua%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Hua%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Hua%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Hua%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Hua%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07019">arXiv:2411.07019</a> <span> [<a href="https://arxiv.org/pdf/2411.07019">pdf</a>, <a href="https://arxiv.org/format/2411.07019">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> UniHR: Hierarchical Representation Learning for Unified Knowledge Graph Link Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiqiang Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Mingyang Chen</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yin Hua</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhuo Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Ziqi Liu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+L">Lei Liang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Huajun Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wen Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07019v1-abstract-short" style="display: inline;"> Beyond-triple fact representations including hyper-relational facts with auxiliary key-value pairs, temporal facts with additional timestamps, and nested facts implying relationships between facts, are gaining significant attention. However, existing link prediction models are usually designed for one specific type of facts, making it difficult to generalize to other fact representations. To overc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07019v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07019v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07019v1-abstract-full" style="display: none;"> Beyond-triple fact representations including hyper-relational facts with auxiliary key-value pairs, temporal facts with additional timestamps, and nested facts implying relationships between facts, are gaining significant attention. However, existing link prediction models are usually designed for one specific type of facts, making it difficult to generalize to other fact representations. To overcome this limitation, we propose a Unified Hierarchical Representation learning framework (UniHR) for unified knowledge graph link prediction. It consists of a unified Hierarchical Data Representation (HiDR) module and a unified Hierarchical Structure Learning (HiSL) module as graph encoder. The HiDR module unifies hyper-relational KGs, temporal KGs, and nested factual KGs into triple-based representations. Then HiSL incorporates intra-fact and inter-fact message passing, focusing on enhancing the semantic information within individual facts and enriching the structural information between facts. Experimental results across 7 datasets from 3 types of KGs demonstrate that our UniHR outperforms baselines designed for one specific kind of KG, indicating strong generalization capability of HiDR form and the effectiveness of HiSL module. Code and data are available at https://github.com/Lza12a/UniHR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07019v1-abstract-full').style.display = 'none'; document.getElementById('2411.07019v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06490">arXiv:2410.06490</a> <span> [<a href="https://arxiv.org/pdf/2410.06490">pdf</a>, <a href="https://arxiv.org/format/2410.06490">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> FedL2G: Learning to Guide Local Training in Heterogeneous Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+J">Jian Cao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Q">Qiang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06490v1-abstract-short" style="display: inline;"> Data and model heterogeneity are two core issues in Heterogeneous Federated Learning (HtFL). In scenarios with heterogeneous model architectures, aggregating model parameters becomes infeasible, leading to the use of prototypes (i.e., class representative feature vectors) for aggregation and guidance. However, they still experience a mismatch between the extra guiding objective and the client's or… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06490v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06490v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06490v1-abstract-full" style="display: none;"> Data and model heterogeneity are two core issues in Heterogeneous Federated Learning (HtFL). In scenarios with heterogeneous model architectures, aggregating model parameters becomes infeasible, leading to the use of prototypes (i.e., class representative feature vectors) for aggregation and guidance. However, they still experience a mismatch between the extra guiding objective and the client's original local objective when aligned with global prototypes. Thus, we propose a Federated Learning-to-Guide (FedL2G) method that adaptively learns to guide local training in a federated manner and ensures the extra guidance is beneficial to clients' original tasks. With theoretical guarantees, FedL2G efficiently implements the learning-to-guide process using only first-order derivatives w.r.t. model parameters and achieves a non-convex convergence rate of O(1/T). We conduct extensive experiments on two data heterogeneity and six model heterogeneity settings using 14 heterogeneous model architectures (e.g., CNNs and ViTs) to demonstrate FedL2G's superior performance compared to six counterparts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06490v1-abstract-full').style.display = 'none'; document.getElementById('2410.06490v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02964">arXiv:2410.02964</a> <span> [<a href="https://arxiv.org/pdf/2410.02964">pdf</a>, <a href="https://arxiv.org/ps/2410.02964">ps</a>, <a href="https://arxiv.org/format/2410.02964">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> A Simple Method for Secret-Key Generation Between Mobile Users Across Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yingbo Hua</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02964v1-abstract-short" style="display: inline;"> Two or more mobiles users can continuously superimpose sequences of bits chosen from different packets or files already exchanged and authenticated between themselves to continuously renew a secret key for continuous strengthening of their privacy and authentication. This accumulative, adaptable and additive (AAA) method is discussed in this paper. The equivocation to Eve of any bit in the generat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02964v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02964v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02964v1-abstract-full" style="display: none;"> Two or more mobiles users can continuously superimpose sequences of bits chosen from different packets or files already exchanged and authenticated between themselves to continuously renew a secret key for continuous strengthening of their privacy and authentication. This accumulative, adaptable and additive (AAA) method is discussed in this paper. The equivocation to Eve of any bit in the generated key by the AAA method equals to the probability that not all corresponding independent bits exchanged between the users are intercepted by Eve. This performance, achieved without using any knowledge of non-stationary probabilities of bits being intercepted by Eve, is compared to an established capacity achievable using that knowledge. A secrecy robustness of the AAA method against some correlations known to Eve is also discussed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02964v1-abstract-full').style.display = 'none'; document.getElementById('2410.02964v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19753">arXiv:2409.19753</a> <span> [<a href="https://arxiv.org/pdf/2409.19753">pdf</a>, <a href="https://arxiv.org/format/2409.19753">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> CoTKR: Chain-of-Thought Enhanced Knowledge Rewriting for Complex Knowledge Graph Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yike Wu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yi Huang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+N">Nan Hu</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yuncheng Hua</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+G">Guilin Qi</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jiaoyan Chen</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+J+Z">Jeff Z. Pan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19753v2-abstract-short" style="display: inline;"> Recent studies have explored the use of Large Language Models (LLMs) with Retrieval Augmented Generation (RAG) for Knowledge Graph Question Answering (KGQA). They typically require rewriting retrieved subgraphs into natural language formats comprehensible to LLMs. However, when tackling complex questions, the knowledge rewritten by existing methods may include irrelevant information, omit crucial… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19753v2-abstract-full').style.display = 'inline'; document.getElementById('2409.19753v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19753v2-abstract-full" style="display: none;"> Recent studies have explored the use of Large Language Models (LLMs) with Retrieval Augmented Generation (RAG) for Knowledge Graph Question Answering (KGQA). They typically require rewriting retrieved subgraphs into natural language formats comprehensible to LLMs. However, when tackling complex questions, the knowledge rewritten by existing methods may include irrelevant information, omit crucial details, or fail to align with the question's semantics. To address them, we propose a novel rewriting method CoTKR, Chain-of-Thought Enhanced Knowledge Rewriting, for generating reasoning traces and corresponding knowledge in an interleaved manner, thereby mitigating the limitations of single-step knowledge rewriting. Additionally, to bridge the preference gap between the knowledge rewriter and the question answering (QA) model, we propose a training strategy PAQAF, Preference Alignment from Question Answering Feedback, for leveraging feedback from the QA model to further optimize the knowledge rewriter. We conduct experiments using various LLMs across several KGQA benchmarks. Experimental results demonstrate that, compared with previous knowledge rewriting methods, CoTKR generates the most beneficial knowledge representation for QA models, which significantly improves the performance of LLMs in KGQA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19753v2-abstract-full').style.display = 'none'; document.getElementById('2409.19753v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15680">arXiv:2409.15680</a> <span> [<a href="https://arxiv.org/pdf/2409.15680">pdf</a>, <a href="https://arxiv.org/format/2409.15680">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Distributed Online Bandit Nonconvex Optimization with One-Point Residual Feedback via Dynamic Regret </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Youqing Hua</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shuai Liu</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+Y">Yiguang Hong</a>, <a href="/search/cs?searchtype=author&query=Johansson%2C+K+H">Karl Henrik Johansson</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Guangchen Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15680v1-abstract-short" style="display: inline;"> This paper considers the distributed online bandit optimization problem with nonconvex loss functions over a time-varying digraph. This problem can be viewed as a repeated game between a group of online players and an adversary. At each round, each player selects a decision from the constraint set, and then the adversary assigns an arbitrary, possibly nonconvex, loss function to this player. Only… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15680v1-abstract-full').style.display = 'inline'; document.getElementById('2409.15680v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15680v1-abstract-full" style="display: none;"> This paper considers the distributed online bandit optimization problem with nonconvex loss functions over a time-varying digraph. This problem can be viewed as a repeated game between a group of online players and an adversary. At each round, each player selects a decision from the constraint set, and then the adversary assigns an arbitrary, possibly nonconvex, loss function to this player. Only the loss value at the current round, rather than the entire loss function or any other information (e.g. gradient), is privately revealed to the player. Players aim to minimize a sequence of global loss functions, which are the sum of local losses. We observe that traditional multi-point bandit algorithms are unsuitable for online optimization, where the data for the loss function are not all a priori, while the one-point bandit algorithms suffer from poor regret guarantees. To address these issues, we propose a novel one-point residual feedback distributed online algorithm. This algorithm estimates the gradient using residuals from two points, effectively reducing the regret bound while maintaining $\mathcal{O}(1)$ sampling complexity per iteration. We employ a rigorous metric, dynamic regret, to evaluate the algorithm's performance. By appropriately selecting the step size and smoothing parameters, we demonstrate that the expected dynamic regret of our algorithm is comparable to existing algorithms that use two-point feedback, provided the deviation in the objective function sequence and the path length of the minimization grows sublinearly. Finally, we validate the effectiveness of the proposed algorithm through numerical simulations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15680v1-abstract-full').style.display = 'none'; document.getElementById('2409.15680v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13137">arXiv:2409.13137</a> <span> [<a href="https://arxiv.org/pdf/2409.13137">pdf</a>, <a href="https://arxiv.org/format/2409.13137">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Interpret the Predictions of Deep Networks via Re-Label Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yingying Hua</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+S">Shiming Ge</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Daichi Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13137v1-abstract-short" style="display: inline;"> Interpreting the predictions of a black-box deep network can facilitate the reliability of its deployment. In this work, we propose a re-label distillation approach to learn a direct map from the input to the prediction in a self-supervision manner. The image is projected into a VAE subspace to generate some synthetic images by randomly perturbing its latent vector. Then, these synthetic images ca… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13137v1-abstract-full').style.display = 'inline'; document.getElementById('2409.13137v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13137v1-abstract-full" style="display: none;"> Interpreting the predictions of a black-box deep network can facilitate the reliability of its deployment. In this work, we propose a re-label distillation approach to learn a direct map from the input to the prediction in a self-supervision manner. The image is projected into a VAE subspace to generate some synthetic images by randomly perturbing its latent vector. Then, these synthetic images can be annotated into one of two classes by identifying whether their labels shift. After that, using the labels annotated by the deep network as teacher, a linear student model is trained to approximate the annotations by mapping these synthetic images to the classes. In this manner, these re-labeled synthetic images can well describe the local classification mechanism of the deep network, and the learned student can provide a more intuitive explanation towards the predictions. Extensive experiments verify the effectiveness of our approach qualitatively and quantitatively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13137v1-abstract-full').style.display = 'none'; document.getElementById('2409.13137v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published by IEEE ICME 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11785">arXiv:2409.11785</a> <span> [<a href="https://arxiv.org/pdf/2409.11785">pdf</a>, <a href="https://arxiv.org/format/2409.11785">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Distilling Channels for Efficient Deep Tracking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ge%2C+S">Shiming Ge</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Z">Zhao Luo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chunhui Zhang</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yingying Hua</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+D">Dacheng Tao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11785v1-abstract-short" style="display: inline;"> Deep trackers have proven success in visual tracking. Typically, these trackers employ optimally pre-trained deep networks to represent all diverse objects with multi-channel features from some fixed layers. The deep networks employed are usually trained to extract rich knowledge from massive data used in object classification and so they are capable to represent generic objects very well. However… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11785v1-abstract-full').style.display = 'inline'; document.getElementById('2409.11785v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11785v1-abstract-full" style="display: none;"> Deep trackers have proven success in visual tracking. Typically, these trackers employ optimally pre-trained deep networks to represent all diverse objects with multi-channel features from some fixed layers. The deep networks employed are usually trained to extract rich knowledge from massive data used in object classification and so they are capable to represent generic objects very well. However, these networks are too complex to represent a specific moving object, leading to poor generalization as well as high computational and memory costs. This paper presents a novel and general framework termed channel distillation to facilitate deep trackers. To validate the effectiveness of channel distillation, we take discriminative correlation filter (DCF) and ECO for example. We demonstrate that an integrated formulation can turn feature compression, response map generation, and model update into a unified energy minimization problem to adaptively select informative feature channels that improve the efficacy of tracking moving objects on the fly. Channel distillation can accurately extract good channels, alleviating the influence of noisy channels and generally reducing the number of channels, as well as adaptively generalizing to different channels and networks. The resulting deep tracker is accurate, fast, and has low memory requirements. Extensive experimental evaluations on popular benchmarks clearly demonstrate the effectiveness and generalizability of our framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11785v1-abstract-full').style.display = 'none'; document.getElementById('2409.11785v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published by IEEE TIP 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.05384">arXiv:2409.05384</a> <span> [<a href="https://arxiv.org/pdf/2409.05384">pdf</a>, <a href="https://arxiv.org/format/2409.05384">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Look One and More: Distilling Hybrid Order Relational Knowledge for Cross-Resolution Image Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ge%2C+S">Shiming Ge</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kangkai Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haolin Liu</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yingying Hua</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Shengwei Zhao</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+X">Xin Jin</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+H">Hao Wen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.05384v1-abstract-short" style="display: inline;"> In spite of great success in many image recognition tasks achieved by recent deep models, directly applying them to recognize low-resolution images may suffer from low accuracy due to the missing of informative details during resolution degradation. However, these images are still recognizable for subjects who are familiar with the corresponding high-resolution ones. Inspired by that, we propose a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05384v1-abstract-full').style.display = 'inline'; document.getElementById('2409.05384v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.05384v1-abstract-full" style="display: none;"> In spite of great success in many image recognition tasks achieved by recent deep models, directly applying them to recognize low-resolution images may suffer from low accuracy due to the missing of informative details during resolution degradation. However, these images are still recognizable for subjects who are familiar with the corresponding high-resolution ones. Inspired by that, we propose a teacher-student learning approach to facilitate low-resolution image recognition via hybrid order relational knowledge distillation. The approach refers to three streams: the teacher stream is pretrained to recognize high-resolution images in high accuracy, the student stream is learned to identify low-resolution images by mimicking the teacher's behaviors, and the extra assistant stream is introduced as bridge to help knowledge transfer across the teacher to the student. To extract sufficient knowledge for reducing the loss in accuracy, the learning of student is supervised with multiple losses, which preserves the similarities in various order relational structures. In this way, the capability of recovering missing details of familiar low-resolution images can be effectively enhanced, leading to a better knowledge transfer. Extensive experiments on metric learning, low-resolution image classification and low-resolution face recognition tasks show the effectiveness of our approach, while taking reduced models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05384v1-abstract-full').style.display = 'none'; document.getElementById('2409.05384v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.05331">arXiv:2409.05331</a> <span> [<a href="https://arxiv.org/pdf/2409.05331">pdf</a>, <a href="https://arxiv.org/format/2409.05331">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Towards Practical Overlay Networks for Decentralized Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yifan Hua</a>, <a href="/search/cs?searchtype=author&query=Pang%2C+J">Jinlong Pang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaoxue Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yi Liu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+X">Xiaofeng Shi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bao Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+C">Chen Qian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.05331v1-abstract-short" style="display: inline;"> Decentralized federated learning (DFL) uses peer-to-peer communication to avoid the single point of failure problem in federated learning and has been considered an attractive solution for machine learning tasks on distributed devices. We provide the first solution to a fundamental network problem of DFL: what overlay network should DFL use to achieve fast training of highly accurate models, low c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05331v1-abstract-full').style.display = 'inline'; document.getElementById('2409.05331v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.05331v1-abstract-full" style="display: none;"> Decentralized federated learning (DFL) uses peer-to-peer communication to avoid the single point of failure problem in federated learning and has been considered an attractive solution for machine learning tasks on distributed devices. We provide the first solution to a fundamental network problem of DFL: what overlay network should DFL use to achieve fast training of highly accurate models, low communication, and decentralized construction and maintenance? Overlay topologies of DFL have been investigated, but no existing DFL topology includes decentralized protocols for network construction and topology maintenance. Without these protocols, DFL cannot run in practice. This work presents an overlay network, called FedLay, which provides fast training and low communication cost for practical DFL. FedLay is the first solution for constructing near-random regular topologies in a decentralized manner and maintaining the topologies under node joins and failures. Experiments based on prototype implementation and simulations show that FedLay achieves the fastest model convergence and highest accuracy on real datasets compared to existing DFL solutions while incurring small communication costs and being resilient to node joins and failures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05331v1-abstract-full').style.display = 'none'; document.getElementById('2409.05331v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.11288">arXiv:2408.11288</a> <span> [<a href="https://arxiv.org/pdf/2408.11288">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Applying and Evaluating Large Language Models in Mental Health Care: A Scoping Review of Human-Assessed Generative Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yining Hua</a>, <a href="/search/cs?searchtype=author&query=Na%2C+H">Hongbin Na</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zehan Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Fenglin Liu</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+X">Xiao Fang</a>, <a href="/search/cs?searchtype=author&query=Clifton%2C+D">David Clifton</a>, <a href="/search/cs?searchtype=author&query=Torous%2C+J">John Torous</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.11288v1-abstract-short" style="display: inline;"> Large language models (LLMs) are emerging as promising tools for mental health care, offering scalable support through their ability to generate human-like responses. However, the effectiveness of these models in clinical settings remains unclear. This scoping review aimed to assess the current generative applications of LLMs in mental health care, focusing on studies where these models were teste… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11288v1-abstract-full').style.display = 'inline'; document.getElementById('2408.11288v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.11288v1-abstract-full" style="display: none;"> Large language models (LLMs) are emerging as promising tools for mental health care, offering scalable support through their ability to generate human-like responses. However, the effectiveness of these models in clinical settings remains unclear. This scoping review aimed to assess the current generative applications of LLMs in mental health care, focusing on studies where these models were tested with human participants in real-world scenarios. A systematic search across APA PsycNet, Scopus, PubMed, and Web of Science identified 726 unique articles, of which 17 met the inclusion criteria. These studies encompassed applications such as clinical assistance, counseling, therapy, and emotional support. However, the evaluation methods were often non-standardized, with most studies relying on ad hoc scales that limit comparability and robustness. Privacy, safety, and fairness were also frequently underexplored. Moreover, reliance on proprietary models, such as OpenAI's GPT series, raises concerns about transparency and reproducibility. While LLMs show potential in expanding mental health care access, especially in underserved areas, the current evidence does not fully support their use as standalone interventions. More rigorous, standardized evaluations and ethical oversight are needed to ensure these tools can be safely and effectively integrated into clinical practice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11288v1-abstract-full').style.display = 'none'; document.getElementById('2408.11288v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.07096">arXiv:2408.07096</a> <span> [<a href="https://arxiv.org/pdf/2408.07096">pdf</a>, <a href="https://arxiv.org/format/2408.07096">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.14778/3685800.3685900">10.14778/3685800.3685900 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> OFL-W3: A One-shot Federated Learning System on Web 3.0 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+L">Linshan Jiang</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+M">Moming Duan</a>, <a href="/search/cs?searchtype=author&query=He%2C+B">Bingsheng He</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yulin Sun</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+P">Peishen Yan</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&query=Song%2C+T">Tao Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.07096v1-abstract-short" style="display: inline;"> Federated Learning (FL) addresses the challenges posed by data silos, which arise from privacy, security regulations, and ownership concerns. Despite these barriers, FL enables these isolated data repositories to participate in collaborative learning without compromising privacy or security. Concurrently, the advancement of blockchain technology and decentralized applications (DApps) within Web 3.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07096v1-abstract-full').style.display = 'inline'; document.getElementById('2408.07096v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.07096v1-abstract-full" style="display: none;"> Federated Learning (FL) addresses the challenges posed by data silos, which arise from privacy, security regulations, and ownership concerns. Despite these barriers, FL enables these isolated data repositories to participate in collaborative learning without compromising privacy or security. Concurrently, the advancement of blockchain technology and decentralized applications (DApps) within Web 3.0 heralds a new era of transformative possibilities in web development. As such, incorporating FL into Web 3.0 paves the path for overcoming the limitations of data silos through collaborative learning. However, given the transaction speed constraints of core blockchains such as Ethereum (ETH) and the latency in smart contracts, employing one-shot FL, which minimizes client-server interactions in traditional FL to a single exchange, is considered more apt for Web 3.0 environments. This paper presents a practical one-shot FL system for Web 3.0, termed OFL-W3. OFL-W3 capitalizes on blockchain technology by utilizing smart contracts for managing transactions. Meanwhile, OFL-W3 utilizes the Inter-Planetary File System (IPFS) coupled with Flask communication, to facilitate backend server operations to use existing one-shot FL algorithms. With the integration of the incentive mechanism, OFL-W3 showcases an effective implementation of one-shot FL on Web 3.0, offering valuable insights and future directions for AI combined with Web 3.0 studies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07096v1-abstract-full').style.display = 'none'; document.getElementById('2408.07096v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">VLDB 24 demo paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.01417">arXiv:2408.01417</a> <span> [<a href="https://arxiv.org/pdf/2408.01417">pdf</a>, <a href="https://arxiv.org/format/2408.01417">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Talk Less, Interact Better: Evaluating In-context Conversational Adaptation in Multimodal LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yilun Hua</a>, <a href="/search/cs?searchtype=author&query=Artzi%2C+Y">Yoav Artzi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.01417v1-abstract-short" style="display: inline;"> Humans spontaneously use increasingly efficient language as interactions progress, by adapting and forming ad-hoc conventions. This phenomenon has been studied extensively using reference games, showing properties of human language that go beyond relaying intents. It remains unexplored whether multimodal large language models (MLLMs) similarly increase communication efficiency during interactions,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01417v1-abstract-full').style.display = 'inline'; document.getElementById('2408.01417v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.01417v1-abstract-full" style="display: none;"> Humans spontaneously use increasingly efficient language as interactions progress, by adapting and forming ad-hoc conventions. This phenomenon has been studied extensively using reference games, showing properties of human language that go beyond relaying intents. It remains unexplored whether multimodal large language models (MLLMs) similarly increase communication efficiency during interactions, and what mechanisms they may adopt for this purpose. We introduce ICCA, an automated framework to evaluate such conversational adaptation as an in-context behavior in MLLMs. We evaluate several state-of-the-art MLLMs, and observe that while they may understand the increasingly efficient language of their interlocutor, they do not spontaneously make their own language more efficient over time. This latter ability can only be elicited in some models (e.g., GPT-4) with heavy-handed prompting. This shows that this property of linguistic interaction does not arise from current training regimes, even though it is a common hallmark of human language. ICCA is available at https://github.com/lil-lab/ICCA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01417v1-abstract-full').style.display = 'none'; document.getElementById('2408.01417v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to COLM 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.15389">arXiv:2407.15389</a> <span> [<a href="https://arxiv.org/pdf/2407.15389">pdf</a>, <a href="https://arxiv.org/format/2407.15389">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Poisoning with A Pill: Circumventing Detection in Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+H">Hanxi Guo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+T">Tao Song</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianhang Zheng</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+H">Haibing Guan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiangyu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.15389v1-abstract-short" style="display: inline;"> Without direct access to the client's data, federated learning (FL) is well-known for its unique strength in data privacy protection among existing distributed machine learning techniques. However, its distributive and iterative nature makes FL inherently vulnerable to various poisoning attacks. To counteract these threats, extensive defenses have been proposed to filter out malicious clients, usi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15389v1-abstract-full').style.display = 'inline'; document.getElementById('2407.15389v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.15389v1-abstract-full" style="display: none;"> Without direct access to the client's data, federated learning (FL) is well-known for its unique strength in data privacy protection among existing distributed machine learning techniques. However, its distributive and iterative nature makes FL inherently vulnerable to various poisoning attacks. To counteract these threats, extensive defenses have been proposed to filter out malicious clients, using various detection metrics. Based on our analysis of existing attacks and defenses, we find that there is a lack of attention to model redundancy. In neural networks, various model parameters contribute differently to the model's performance. However, existing attacks in FL manipulate all the model update parameters with the same strategy, making them easily detectable by common defenses. Meanwhile, the defenses also tend to analyze the overall statistical features of the entire model updates, leaving room for sophisticated attacks. Based on these observations, this paper proposes a generic and attack-agnostic augmentation approach designed to enhance the effectiveness and stealthiness of existing FL poisoning attacks against detection in FL, pointing out the inherent flaws of existing defenses and exposing the necessity of fine-grained FL security. Specifically, we employ a three-stage methodology that strategically constructs, generates, and injects poison (generated by existing attacks) into a pill (a tiny subnet with a novel structure) during the FL training, named as pill construction, pill poisoning, and pill injection accordingly. Extensive experimental results show that FL poisoning attacks enhanced by our method can bypass all the popular defenses, and can gain an up to 7x error rate increase, as well as on average a more than 2x error rate increase on both IID and non-IID data, in both cross-silo and cross-device FL systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15389v1-abstract-full').style.display = 'none'; document.getElementById('2407.15389v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.18916">arXiv:2406.18916</a> <span> [<a href="https://arxiv.org/pdf/2406.18916">pdf</a>, <a href="https://arxiv.org/format/2406.18916">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TrustUQA: A Trustful Framework for Unified Structured Data Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wen Zhang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+L">Long Jin</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yushan Zhu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jiaoyan Chen</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zhiwei Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Junjie Wang</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yin Hua</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+L">Lei Liang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Huajun Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.18916v1-abstract-short" style="display: inline;"> Natural language question answering (QA) over structured data sources such as tables and knowledge graphs (KGs) have been widely investigated, for example with Large Language Models (LLMs). The main solutions include question to formal query parsing and retrieval-based answer generation. However, current methods of the former often suffer from weak generalization, failing to dealing with multiple… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18916v1-abstract-full').style.display = 'inline'; document.getElementById('2406.18916v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.18916v1-abstract-full" style="display: none;"> Natural language question answering (QA) over structured data sources such as tables and knowledge graphs (KGs) have been widely investigated, for example with Large Language Models (LLMs). The main solutions include question to formal query parsing and retrieval-based answer generation. However, current methods of the former often suffer from weak generalization, failing to dealing with multiple sources simultaneously, while the later is limited in trustfulness. In this paper, we propose UnifiedTQA, a trustful QA framework that can simultaneously support multiple types of structured data in a unified way. To this end, it adopts an LLM-friendly and unified knowledge representation method called Condition Graph (CG), and uses an LLM and demonstration-based two-level method for CG querying. For enhancement, it is also equipped with dynamic demonstration retrieval. We have evaluated UnifiedTQA with 5 benchmarks covering 3 types of structured data. It outperforms 2 existing unified structured data QA methods and in comparison with the baselines that are specific to a data type, it achieves state-of-the-art on 2 of them. Further more, we demonstrates potential of our method for more general QA tasks, QA over mixed structured data and QA across structured data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.18916v1-abstract-full').style.display = 'none'; document.getElementById('2406.18916v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.15490">arXiv:2406.15490</a> <span> [<a href="https://arxiv.org/pdf/2406.15490">pdf</a>, <a href="https://arxiv.org/format/2406.15490">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Causal Discovery Inspired Unsupervised Domain Adaptation for Emotion-Cause Pair Extraction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yuncheng Hua</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yujin Huang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+S">Shuo Huang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+T">Tao Feng</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+L">Lizhen Qu</a>, <a href="/search/cs?searchtype=author&query=Bain%2C+C">Chris Bain</a>, <a href="/search/cs?searchtype=author&query=Bassed%2C+R">Richard Bassed</a>, <a href="/search/cs?searchtype=author&query=Haffari%2C+G">Gholamreza Haffari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.15490v1-abstract-short" style="display: inline;"> This paper tackles the task of emotion-cause pair extraction in the unsupervised domain adaptation setting. The problem is challenging as the distributions of the events causing emotions in target domains are dramatically different than those in source domains, despite the distributions of emotional expressions between domains are overlapped. Inspired by causal discovery, we propose a novel deep l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15490v1-abstract-full').style.display = 'inline'; document.getElementById('2406.15490v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.15490v1-abstract-full" style="display: none;"> This paper tackles the task of emotion-cause pair extraction in the unsupervised domain adaptation setting. The problem is challenging as the distributions of the events causing emotions in target domains are dramatically different than those in source domains, despite the distributions of emotional expressions between domains are overlapped. Inspired by causal discovery, we propose a novel deep latent model in the variational autoencoder (VAE) framework, which not only captures the underlying latent structures of data but also utilizes the easily transferable knowledge of emotions as the bridge to link the distributions of events in different domains. To facilitate knowledge transfer across domains, we also propose a novel variational posterior regularization technique to disentangle the latent representations of emotions from those of events in order to mitigate the damage caused by the spurious correlations related to the events in source domains. Through extensive experiments, we demonstrate that our model outperforms the strongest baseline by approximately 11.05% on a Chinese benchmark and 2.45% on a English benchmark in terms of weighted-average F1 score. The source code will be publicly available upon acceptance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15490v1-abstract-full').style.display = 'none'; document.getElementById('2406.15490v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 6 figures, 4 tables; Under Review in EMNLP 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.4 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.10882">arXiv:2406.10882</a> <span> [<a href="https://arxiv.org/pdf/2406.10882">pdf</a>, <a href="https://arxiv.org/format/2406.10882">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SCAR: Efficient Instruction-Tuning for Large Language Models via Style Consistency-Aware Response Ranking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuang Li</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yuncheng Hua</a>, <a href="/search/cs?searchtype=author&query=Vu%2C+T">Thuy-Trang Vu</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+H">Haolan Zhan</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+L">Lizhen Qu</a>, <a href="/search/cs?searchtype=author&query=Haffari%2C+G">Gholamreza Haffari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.10882v6-abstract-short" style="display: inline;"> Recent studies have shown that maintaining a consistent response style by human experts and enhancing data quality in training sets can significantly improve the performance of fine-tuned Large Language Models (LLMs) while reducing the number of training examples needed. However, the precise definition of style and the relationship between style, data quality, and LLM performance remains unclear.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10882v6-abstract-full').style.display = 'inline'; document.getElementById('2406.10882v6-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.10882v6-abstract-full" style="display: none;"> Recent studies have shown that maintaining a consistent response style by human experts and enhancing data quality in training sets can significantly improve the performance of fine-tuned Large Language Models (LLMs) while reducing the number of training examples needed. However, the precise definition of style and the relationship between style, data quality, and LLM performance remains unclear. This research identifies two key stylistic elements in responses: linguistic form and semantic surprisal. We find that, among training data of comparable quality, higher consistency in these response elements leads to better LLM performance. Inspired by this, we introduce Style Consistency-Aware Response Ranking (SCAR), which automatically prioritizes instruction-response pairs in the training set based on their response stylistic consistency. By selecting the most style-consistent examples, sometimes as few as 0.7% of the full dataset, the fine-tuned LLMs can match or even surpass the performance of models trained on the entire dataset in coding and open-ended question-answering benchmarks. Code and data are available at https://github.com/zhuang-li/SCAR . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10882v6-abstract-full').style.display = 'none'; document.getElementById('2406.10882v6-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.10633">arXiv:2406.10633</a> <span> [<a href="https://arxiv.org/pdf/2406.10633">pdf</a>, <a href="https://arxiv.org/format/2406.10633">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> fNeRF: High Quality Radiance Fields from Practical Cameras </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yi Hua</a>, <a href="/search/cs?searchtype=author&query=Lassner%2C+C">Christoph Lassner</a>, <a href="/search/cs?searchtype=author&query=Stoll%2C+C">Carsten Stoll</a>, <a href="/search/cs?searchtype=author&query=Matthews%2C+I">Iain Matthews</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.10633v1-abstract-short" style="display: inline;"> In recent years, the development of Neural Radiance Fields has enabled a previously unseen level of photo-realistic 3D reconstruction of scenes and objects from multi-view camera data. However, previous methods use an oversimplified pinhole camera model resulting in defocus blur being `baked' into the reconstructed radiance field. We propose a modification to the ray casting that leverages the opt… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10633v1-abstract-full').style.display = 'inline'; document.getElementById('2406.10633v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.10633v1-abstract-full" style="display: none;"> In recent years, the development of Neural Radiance Fields has enabled a previously unseen level of photo-realistic 3D reconstruction of scenes and objects from multi-view camera data. However, previous methods use an oversimplified pinhole camera model resulting in defocus blur being `baked' into the reconstructed radiance field. We propose a modification to the ray casting that leverages the optics of lenses to enhance scene reconstruction in the presence of defocus blur. This allows us to improve the quality of radiance field reconstructions from the measurements of a practical camera with finite aperture. We show that the proposed model matches the defocus blur behavior of practical cameras more closely than pinhole models and other approximations of defocus blur models, particularly in the presence of partial occlusions. This allows us to achieve sharper reconstructions, improving the PSNR on validation of all-in-focus images, on both synthetic and real datasets, by up to 3 dB. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10633v1-abstract-full').style.display = 'none'; document.getElementById('2406.10633v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.09838">arXiv:2406.09838</a> <span> [<a href="https://arxiv.org/pdf/2406.09838">pdf</a>, <a href="https://arxiv.org/format/2406.09838">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Vision-Language Models Meet Meteorology: Developing Models for Extreme Weather Events Detection with Heatmaps </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jian Chen</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+P">Peilin Zhou</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yining Hua</a>, <a href="/search/cs?searchtype=author&query=Chong%2C+D">Dading Chong</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+M">Meng Cao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yaowei Li</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+Z">Zixuan Yuan</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+B">Bing Zhu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+J">Junwei Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.09838v1-abstract-short" style="display: inline;"> Real-time detection and prediction of extreme weather protect human lives and infrastructure. Traditional methods rely on numerical threshold setting and manual interpretation of weather heatmaps with Geographic Information Systems (GIS), which can be slow and error-prone. Our research redefines Extreme Weather Events Detection (EWED) by framing it as a Visual Question Answering (VQA) problem, the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09838v1-abstract-full').style.display = 'inline'; document.getElementById('2406.09838v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.09838v1-abstract-full" style="display: none;"> Real-time detection and prediction of extreme weather protect human lives and infrastructure. Traditional methods rely on numerical threshold setting and manual interpretation of weather heatmaps with Geographic Information Systems (GIS), which can be slow and error-prone. Our research redefines Extreme Weather Events Detection (EWED) by framing it as a Visual Question Answering (VQA) problem, thereby introducing a more precise and automated solution. Leveraging Vision-Language Models (VLM) to simultaneously process visual and textual data, we offer an effective aid to enhance the analysis process of weather heatmaps. Our initial assessment of general-purpose VLMs (e.g., GPT-4-Vision) on EWED revealed poor performance, characterized by low accuracy and frequent hallucinations due to inadequate color differentiation and insufficient meteorological knowledge. To address these challenges, we introduce ClimateIQA, the first meteorological VQA dataset, which includes 8,760 wind gust heatmaps and 254,040 question-answer pairs covering four question types, both generated from the latest climate reanalysis data. We also propose Sparse Position and Outline Tracking (SPOT), an innovative technique that leverages OpenCV and K-Means clustering to capture and depict color contours in heatmaps, providing ClimateIQA with more accurate color spatial location information. Finally, we present Climate-Zoo, the first meteorological VLM collection, which adapts VLMs to meteorological applications using the ClimateIQA dataset. Experiment results demonstrate that models from Climate-Zoo substantially outperform state-of-the-art general VLMs, achieving an accuracy increase from 0% to over 90% in EWED verification. The datasets and models in this study are publicly available for future climate science research: https://github.com/AlexJJJChen/Climate-Zoo. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.09838v1-abstract-full').style.display = 'none'; document.getElementById('2406.09838v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.19931">arXiv:2405.19931</a> <span> [<a href="https://arxiv.org/pdf/2405.19931">pdf</a>, <a href="https://arxiv.org/format/2405.19931">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Exploring Diffusion Models' Corruption Stage in Few-Shot Fine-tuning and Mitigating with Bayesian Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiaoyu Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiaru Zhang</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+B">Bohan Lyu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+T">Tao Song</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+H">Haibing Guan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.19931v1-abstract-short" style="display: inline;"> Few-shot fine-tuning of Diffusion Models (DMs) is a key advancement, significantly reducing training costs and enabling personalized AI applications. However, we explore the training dynamics of DMs and observe an unanticipated phenomenon: during the training process, image fidelity initially improves, then unexpectedly deteriorates with the emergence of noisy patterns, only to recover later with… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19931v1-abstract-full').style.display = 'inline'; document.getElementById('2405.19931v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.19931v1-abstract-full" style="display: none;"> Few-shot fine-tuning of Diffusion Models (DMs) is a key advancement, significantly reducing training costs and enabling personalized AI applications. However, we explore the training dynamics of DMs and observe an unanticipated phenomenon: during the training process, image fidelity initially improves, then unexpectedly deteriorates with the emergence of noisy patterns, only to recover later with severe overfitting. We term the stage with generated noisy patterns as corruption stage. To understand this corruption stage, we begin by theoretically modeling the one-shot fine-tuning scenario, and then extend this modeling to more general cases. Through this modeling, we identify the primary cause of this corruption stage: a narrowed learning distribution inherent in the nature of few-shot fine-tuning. To tackle this, we apply Bayesian Neural Networks (BNNs) on DMs with variational inference to implicitly broaden the learned distribution, and present that the learning target of the BNNs can be naturally regarded as an expectation of the diffusion loss and a further regularization with the pretrained DMs. This approach is highly compatible with current few-shot fine-tuning methods in DMs and does not introduce any extra inference costs. Experimental results demonstrate that our method significantly mitigates corruption, and improves the fidelity, quality and diversity of the generated images in both object-driven and subject-driven generation tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19931v1-abstract-full').style.display = 'none'; document.getElementById('2405.19931v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint. Under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16663">arXiv:2405.16663</a> <span> [<a href="https://arxiv.org/pdf/2405.16663">pdf</a>, <a href="https://arxiv.org/ps/2405.16663">ps</a>, <a href="https://arxiv.org/format/2405.16663">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Private Edge Density Estimation for Random Graphs: Optimal, Efficient and Robust </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hongjie Chen</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+J">Jingqiu Ding</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yiding Hua</a>, <a href="/search/cs?searchtype=author&query=Steurer%2C+D">David Steurer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16663v2-abstract-short" style="display: inline;"> We give the first polynomial-time, differentially node-private, and robust algorithm for estimating the edge density of Erd艖s-R茅nyi random graphs and their generalization, inhomogeneous random graphs. We further prove information-theoretical lower bounds, showing that the error rate of our algorithm is optimal up to logarithmic factors. Previous algorithms incur either exponential running time or… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16663v2-abstract-full').style.display = 'inline'; document.getElementById('2405.16663v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16663v2-abstract-full" style="display: none;"> We give the first polynomial-time, differentially node-private, and robust algorithm for estimating the edge density of Erd艖s-R茅nyi random graphs and their generalization, inhomogeneous random graphs. We further prove information-theoretical lower bounds, showing that the error rate of our algorithm is optimal up to logarithmic factors. Previous algorithms incur either exponential running time or suboptimal error rates. Two key ingredients of our algorithm are (1) a new sum-of-squares algorithm for robust edge density estimation, and (2) the reduction from privacy to robustness based on sum-of-squares exponential mechanisms due to Hopkins et al. (STOC 2023). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16663v2-abstract-full').style.display = 'none'; document.getElementById('2405.16663v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">fix minor typos; add missing references</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.09980">arXiv:2405.09980</a> <span> [<a href="https://arxiv.org/pdf/2405.09980">pdf</a>, <a href="https://arxiv.org/format/2405.09980">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> FinTextQA: A Dataset for Long-form Financial Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jian Chen</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+P">Peilin Zhou</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yining Hua</a>, <a href="/search/cs?searchtype=author&query=Loh%2C+Y">Yingxin Loh</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kehui Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Ziyuan Li</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+B">Bing Zhu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+J">Junwei Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.09980v1-abstract-short" style="display: inline;"> Accurate evaluation of financial question answering (QA) systems necessitates a comprehensive dataset encompassing diverse question types and contexts. However, current financial QA datasets lack scope diversity and question complexity. This work introduces FinTextQA, a novel dataset for long-form question answering (LFQA) in finance. FinTextQA comprises 1,262 high-quality, source-attributed QA pa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09980v1-abstract-full').style.display = 'inline'; document.getElementById('2405.09980v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.09980v1-abstract-full" style="display: none;"> Accurate evaluation of financial question answering (QA) systems necessitates a comprehensive dataset encompassing diverse question types and contexts. However, current financial QA datasets lack scope diversity and question complexity. This work introduces FinTextQA, a novel dataset for long-form question answering (LFQA) in finance. FinTextQA comprises 1,262 high-quality, source-attributed QA pairs extracted and selected from finance textbooks and government agency websites.Moreover, we developed a Retrieval-Augmented Generation (RAG)-based LFQA system, comprising an embedder, retriever, reranker, and generator. A multi-faceted evaluation approach, including human ranking, automatic metrics, and GPT-4 scoring, was employed to benchmark the performance of different LFQA system configurations under heightened noisy conditions. The results indicate that: (1) Among all compared generators, Baichuan2-7B competes closely with GPT-3.5-turbo in accuracy score; (2) The most effective system configuration on our dataset involved setting the embedder, retriever, reranker, and generator as Ada2, Automated Merged Retrieval, Bge-Reranker-Base, and Baichuan2-7B, respectively; (3) models are less susceptible to noise after the length of contexts reaching a specific threshold. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09980v1-abstract-full').style.display = 'none'; document.getElementById('2405.09980v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.00716">arXiv:2405.00716</a> <span> [<a href="https://arxiv.org/pdf/2405.00716">pdf</a>, <a href="https://arxiv.org/format/2405.00716">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Large Language Models in the Clinic: A Comprehensive Benchmark </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+F">Fenglin Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zheng Li</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Hongjian Zhou</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+Q">Qingyu Yin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jingfeng Yang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xianfeng Tang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+C">Chen Luo</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+M">Ming Zeng</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+H">Haoming Jiang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yifan Gao</a>, <a href="/search/cs?searchtype=author&query=Nigam%2C+P">Priyanka Nigam</a>, <a href="/search/cs?searchtype=author&query=Nag%2C+S">Sreyashi Nag</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+B">Bing Yin</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yining Hua</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xuan Zhou</a>, <a href="/search/cs?searchtype=author&query=Rohanian%2C+O">Omid Rohanian</a>, <a href="/search/cs?searchtype=author&query=Thakur%2C+A">Anshul Thakur</a>, <a href="/search/cs?searchtype=author&query=Clifton%2C+L">Lei Clifton</a>, <a href="/search/cs?searchtype=author&query=Clifton%2C+D+A">David A. Clifton</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.00716v4-abstract-short" style="display: inline;"> The adoption of large language models (LLMs) to assist clinicians has attracted remarkable attention. Existing works mainly adopt the close-ended question-answering (QA) task with answer options for evaluation. However, many clinical decisions involve answering open-ended questions without pre-set options. To better understand LLMs in the clinic, we construct a benchmark ClinicBench. We first coll… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.00716v4-abstract-full').style.display = 'inline'; document.getElementById('2405.00716v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.00716v4-abstract-full" style="display: none;"> The adoption of large language models (LLMs) to assist clinicians has attracted remarkable attention. Existing works mainly adopt the close-ended question-answering (QA) task with answer options for evaluation. However, many clinical decisions involve answering open-ended questions without pre-set options. To better understand LLMs in the clinic, we construct a benchmark ClinicBench. We first collect eleven existing datasets covering diverse clinical language generation, understanding, and reasoning tasks. Furthermore, we construct six novel datasets and clinical tasks that are complex but common in real-world practice, e.g., open-ended decision-making, long document processing, and emerging drug analysis. We conduct an extensive evaluation of twenty-two LLMs under both zero-shot and few-shot settings. Finally, we invite medical experts to evaluate the clinical usefulness of LLMs. The benchmark data is available at https://github.com/AI-in-Health/ClinicBench. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.00716v4-abstract-full').style.display = 'none'; document.getElementById('2405.00716v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at EMNLP 2024 Main Conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.19007">arXiv:2404.19007</a> <span> [<a href="https://arxiv.org/pdf/2404.19007">pdf</a>, <a href="https://arxiv.org/format/2404.19007">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> How Did We Get Here? Summarizing Conversation Dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yilun Hua</a>, <a href="/search/cs?searchtype=author&query=Chernogor%2C+N">Nicholas Chernogor</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yuzhe Gu</a>, <a href="/search/cs?searchtype=author&query=Jeong%2C+S+J">Seoyeon Julie Jeong</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+M">Miranda Luo</a>, <a href="/search/cs?searchtype=author&query=Danescu-Niculescu-Mizil%2C+C">Cristian Danescu-Niculescu-Mizil</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.19007v1-abstract-short" style="display: inline;"> Throughout a conversation, the way participants interact with each other is in constant flux: their tones may change, they may resort to different strategies to convey their points, or they might alter their interaction patterns. An understanding of these dynamics can complement that of the actual facts and opinions discussed, offering a more holistic view of the trajectory of the conversation: ho… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.19007v1-abstract-full').style.display = 'inline'; document.getElementById('2404.19007v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.19007v1-abstract-full" style="display: none;"> Throughout a conversation, the way participants interact with each other is in constant flux: their tones may change, they may resort to different strategies to convey their points, or they might alter their interaction patterns. An understanding of these dynamics can complement that of the actual facts and opinions discussed, offering a more holistic view of the trajectory of the conversation: how it arrived at its current state and where it is likely heading. In this work, we introduce the task of summarizing the dynamics of conversations, by constructing a dataset of human-written summaries, and exploring several automated baselines. We evaluate whether such summaries can capture the trajectory of conversations via an established downstream task: forecasting whether an ongoing conversation will eventually derail into toxic behavior. We show that they help both humans and automated systems with this forecasting task. Humans make predictions three times faster, and with greater confidence, when reading the summaries than when reading the transcripts. Furthermore, automated forecasting systems are more accurate when constructing, and then predicting based on, summaries of conversation dynamics, compared to directly predicting on the transcripts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.19007v1-abstract-full').style.display = 'none'; document.getElementById('2404.19007v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in the Proceedings of NAACL 2024. Data available in ConvoKit https://convokit.cornell.edu/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.13504">arXiv:2404.13504</a> <span> [<a href="https://arxiv.org/pdf/2404.13504">pdf</a>, <a href="https://arxiv.org/format/2404.13504">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> IMO: Greedy Layer-Wise Sparse Representation Learning for Out-of-Distribution Text Classification with Pre-trained Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+T">Tao Feng</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+L">Lizhen Qu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuang Li</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+H">Haolan Zhan</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yuncheng Hua</a>, <a href="/search/cs?searchtype=author&query=Haffari%2C+G">Gholamreza Haffari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.13504v1-abstract-short" style="display: inline;"> Machine learning models have made incredible progress, but they still struggle when applied to examples from unseen domains. This study focuses on a specific problem of domain generalization, where a model is trained on one source domain and tested on multiple target domains that are unseen during training. We propose IMO: Invariant features Masks for Out-of-Distribution text classification, to ac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13504v1-abstract-full').style.display = 'inline'; document.getElementById('2404.13504v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.13504v1-abstract-full" style="display: none;"> Machine learning models have made incredible progress, but they still struggle when applied to examples from unseen domains. This study focuses on a specific problem of domain generalization, where a model is trained on one source domain and tested on multiple target domains that are unseen during training. We propose IMO: Invariant features Masks for Out-of-Distribution text classification, to achieve OOD generalization by learning invariant features. During training, IMO would learn sparse mask layers to remove irrelevant features for prediction, where the remaining features keep invariant. Additionally, IMO has an attention module at the token level to focus on tokens that are useful for prediction. Our comprehensive experiments show that IMO substantially outperforms strong baselines in terms of various evaluation metrics and settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13504v1-abstract-full').style.display = 'none'; document.getElementById('2404.13504v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.15760">arXiv:2403.15760</a> <span> [<a href="https://arxiv.org/pdf/2403.15760">pdf</a>, <a href="https://arxiv.org/format/2403.15760">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> An Upload-Efficient Scheme for Transferring Knowledge From a Server-Side Pre-trained Generator to Clients in Heterogeneous Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+J">Jian Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.15760v2-abstract-short" style="display: inline;"> Heterogeneous Federated Learning (HtFL) enables task-specific knowledge sharing among clients with different model architectures while preserving privacy. Despite recent research progress, transferring knowledge in HtFL is still difficult due to data and model heterogeneity. To tackle this, we introduce a public pre-trained generator (e.g., StyleGAN or Stable Diffusion) as the bridge and propose a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15760v2-abstract-full').style.display = 'inline'; document.getElementById('2403.15760v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.15760v2-abstract-full" style="display: none;"> Heterogeneous Federated Learning (HtFL) enables task-specific knowledge sharing among clients with different model architectures while preserving privacy. Despite recent research progress, transferring knowledge in HtFL is still difficult due to data and model heterogeneity. To tackle this, we introduce a public pre-trained generator (e.g., StyleGAN or Stable Diffusion) as the bridge and propose a new upload-efficient knowledge transfer scheme called Federated Knowledge-Transfer-Loop (FedKTL). It can produce task-related prototypical image-vector pairs via the generator's inference on the server. With these pairs, each client can transfer common knowledge from the generator to its local model through an additional supervised local task. We conduct extensive experiments on four datasets under two types of data heterogeneity with 14 heterogeneous models, including CNNs and ViTs. Results show that our FedKTL surpasses seven state-of-the-art methods by up to 7.31%. Moreover, our knowledge transfer scheme is applicable in cloud-edge scenarios with only one edge client. Code: https://github.com/TsingZ0/FedKTL <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15760v2-abstract-full').style.display = 'none'; document.getElementById('2403.15760v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CVPR2024. We have incorporated additional analysis for the Stable Diffusion experiments in Appendix A</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.12213">arXiv:2403.12213</a> <span> [<a href="https://arxiv.org/pdf/2403.12213">pdf</a>, <a href="https://arxiv.org/ps/2403.12213">ps</a>, <a href="https://arxiv.org/format/2403.12213">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Complexity">cs.CC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Private graphon estimation via sum-of-squares </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hongjie Chen</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+J">Jingqiu Ding</a>, <a href="/search/cs?searchtype=author&query=d%27Orsi%2C+T">Tommaso d'Orsi</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yiding Hua</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chih-Hung Liu</a>, <a href="/search/cs?searchtype=author&query=Steurer%2C+D">David Steurer</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.12213v2-abstract-short" style="display: inline;"> We develop the first pure node-differentially-private algorithms for learning stochastic block models and for graphon estimation with polynomial running time for any constant number of blocks. The statistical utility guarantees match those of the previous best information-theoretic (exponential-time) node-private mechanisms for these problems. The algorithm is based on an exponential mechanism for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.12213v2-abstract-full').style.display = 'inline'; document.getElementById('2403.12213v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.12213v2-abstract-full" style="display: none;"> We develop the first pure node-differentially-private algorithms for learning stochastic block models and for graphon estimation with polynomial running time for any constant number of blocks. The statistical utility guarantees match those of the previous best information-theoretic (exponential-time) node-private mechanisms for these problems. The algorithm is based on an exponential mechanism for a score function defined in terms of a sum-of-squares relaxation whose level depends on the number of blocks. The key ingredients of our results are (1) a characterization of the distance between the block graphons in terms of a quadratic optimization over the polytope of doubly stochastic matrices, (2) a general sum-of-squares convergence result for polynomial optimization over arbitrary polytopes, and (3) a general approach to perform Lipschitz extensions of score functions as part of the sum-of-squares algorithmic paradigm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.12213v2-abstract-full').style.display = 'none'; document.getElementById('2403.12213v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">71 pages, accepted to STOC 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.11162">arXiv:2403.11162</a> <span> [<a href="https://arxiv.org/pdf/2403.11162">pdf</a>, <a href="https://arxiv.org/format/2403.11162">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> CGI-DM: Digital Copyright Authentication for Diffusion Models via Contrasting Gradient Inversion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiaoyu Wu</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+C">Chumeng Liang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiaru Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+T">Tao Song</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+H">Haibing Guan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.11162v1-abstract-short" style="display: inline;"> Diffusion Models (DMs) have evolved into advanced image generation tools, especially for few-shot generation where a pretrained model is fine-tuned on a small set of images to capture a specific style or object. Despite their success, concerns exist about potential copyright violations stemming from the use of unauthorized data in this process. In response, we present Contrasting Gradient Inversio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11162v1-abstract-full').style.display = 'inline'; document.getElementById('2403.11162v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.11162v1-abstract-full" style="display: none;"> Diffusion Models (DMs) have evolved into advanced image generation tools, especially for few-shot generation where a pretrained model is fine-tuned on a small set of images to capture a specific style or object. Despite their success, concerns exist about potential copyright violations stemming from the use of unauthorized data in this process. In response, we present Contrasting Gradient Inversion for Diffusion Models (CGI-DM), a novel method featuring vivid visual representations for digital copyright authentication. Our approach involves removing partial information of an image and recovering missing details by exploiting conceptual differences between the pretrained and fine-tuned models. We formulate the differences as KL divergence between latent variables of the two models when given the same input image, which can be maximized through Monte Carlo sampling and Projected Gradient Descent (PGD). The similarity between original and recovered images serves as a strong indicator of potential infringements. Extensive experiments on the WikiArt and Dreambooth datasets demonstrate the high accuracy of CGI-DM in digital copyright authentication, surpassing alternative validation techniques. Code implementation is available at https://github.com/Nicholas0228/Revelio. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11162v1-abstract-full').style.display = 'none'; document.getElementById('2403.11162v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CVPR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.10051">arXiv:2403.10051</a> <span> [<a href="https://arxiv.org/pdf/2403.10051">pdf</a>, <a href="https://arxiv.org/format/2403.10051">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> Accelerating Regular Path Queries over Graph Database with Processing-in-Memory </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+R">Ruoyan Ma</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+S">Shengan Zheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Guifeng Wang</a>, <a href="/search/cs?searchtype=author&query=Pu%2C+J">Jin Pu</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yifan Hua</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wentao Wang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+L">Linpeng Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.10051v1-abstract-short" style="display: inline;"> Regular path queries (RPQs) in graph databases are bottlenecked by the memory wall. Emerging processing-in-memory (PIM) technologies offer a promising solution to dispatch and execute path matching tasks in parallel within PIM modules. We present Moctopus, a PIM-based data management system for graph databases that supports efficient batch RPQs and graph updates. Moctopus employs a PIM-friendly dy… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.10051v1-abstract-full').style.display = 'inline'; document.getElementById('2403.10051v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.10051v1-abstract-full" style="display: none;"> Regular path queries (RPQs) in graph databases are bottlenecked by the memory wall. Emerging processing-in-memory (PIM) technologies offer a promising solution to dispatch and execute path matching tasks in parallel within PIM modules. We present Moctopus, a PIM-based data management system for graph databases that supports efficient batch RPQs and graph updates. Moctopus employs a PIM-friendly dynamic graph partitioning algorithm, which tackles graph skewness and preserves graph locality with low overhead for RPQ processing. Moctopus enables efficient graph update by amortizing the host CPU's update overhead to PIM modules. Evaluation of Moctopus demonstrates superiority over the state-of-the-art traditional graph database. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.10051v1-abstract-full').style.display = 'none'; document.getElementById('2403.10051v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.06438">arXiv:2403.06438</a> <span> [<a href="https://arxiv.org/pdf/2403.06438">pdf</a>, <a href="https://arxiv.org/format/2403.06438">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Unification of Secret Key Generation and Wiretap Channel Transmission </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yingbo Hua</a>, <a href="/search/cs?searchtype=author&query=Rahman%2C+M+S">Md Saydur Rahman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.06438v1-abstract-short" style="display: inline;"> This paper presents further insights into a recently developed round-trip communication scheme called ``Secret-message Transmission by Echoing Encrypted Probes (STEEP)''. A legitimate wireless channel between a multi-antenna user (Alice) and a single-antenna user (Bob) in the presence of a multi-antenna eavesdropper (Eve) is focused on. STEEP does not require full-duplex, channel reciprocity or Ev… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.06438v1-abstract-full').style.display = 'inline'; document.getElementById('2403.06438v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.06438v1-abstract-full" style="display: none;"> This paper presents further insights into a recently developed round-trip communication scheme called ``Secret-message Transmission by Echoing Encrypted Probes (STEEP)''. A legitimate wireless channel between a multi-antenna user (Alice) and a single-antenna user (Bob) in the presence of a multi-antenna eavesdropper (Eve) is focused on. STEEP does not require full-duplex, channel reciprocity or Eve's channel state information, but is able to yield a positive secrecy rate in bits per channel use between Alice and Bob in every channel coherence period as long as Eve's receive channel is not noiseless. This secrecy rate does not diminish as coherence time increases. Various statistical behaviors of STEEP's secrecy capacity due to random channel fading are also illustrated. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.06438v1-abstract-full').style.display = 'none'; document.getElementById('2403.06438v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted for presentation at IEEE ICC 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.02529">arXiv:2403.02529</a> <span> [<a href="https://arxiv.org/pdf/2403.02529">pdf</a>, <a href="https://arxiv.org/format/2403.02529">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/LWC.2024.3373716">10.1109/LWC.2024.3373716 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Secret-Key Capacity from MIMO Channel Probing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yingbo Hua</a>, <a href="/search/cs?searchtype=author&query=Maksud%2C+A">Ahmed Maksud</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.02529v1-abstract-short" style="display: inline;"> Revealing expressions of secret-key capacity (SKC) based on data sets from Gaussian MIMO channel probing are presented. It is shown that Maurer's upper and lower bounds on SKC coincide when the used data sets are produced from one-way channel probing. As channel coherence time increases, SKC in bits per probing channel use is always lower bounded by a positive value unless eavesdropper's observati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.02529v1-abstract-full').style.display = 'inline'; document.getElementById('2403.02529v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.02529v1-abstract-full" style="display: none;"> Revealing expressions of secret-key capacity (SKC) based on data sets from Gaussian MIMO channel probing are presented. It is shown that Maurer's upper and lower bounds on SKC coincide when the used data sets are produced from one-way channel probing. As channel coherence time increases, SKC in bits per probing channel use is always lower bounded by a positive value unless eavesdropper's observations are noiseless, which is unlike SKC solely based on reciprocal channels. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.02529v1-abstract-full').style.display = 'none'; document.getElementById('2403.02529v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication in IEEE Wireless Communications Letters</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.11541">arXiv:2402.11541</a> <span> [<a href="https://arxiv.org/pdf/2402.11541">pdf</a>, <a href="https://arxiv.org/format/2402.11541">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Large Language Models Can Better Understand Knowledge Graphs Than We Thought </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dai%2C+X">Xinbang Dai</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yuncheng Hua</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+T">Tongtong Wu</a>, <a href="/search/cs?searchtype=author&query=Sheng%2C+Y">Yang Sheng</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+Q">Qiu Ji</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+G">Guilin Qi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.11541v3-abstract-short" style="display: inline;"> As the parameter scale of large language models (LLMs) grows, jointly training knowledge graph (KG) embeddings with model parameters to enhance LLM capabilities becomes increasingly costly. Consequently, the community has shown interest in developing prompt strategies that effectively integrate KG information into LLMs. However, the format for incorporating KGs into LLMs lacks standardization; for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11541v3-abstract-full').style.display = 'inline'; document.getElementById('2402.11541v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.11541v3-abstract-full" style="display: none;"> As the parameter scale of large language models (LLMs) grows, jointly training knowledge graph (KG) embeddings with model parameters to enhance LLM capabilities becomes increasingly costly. Consequently, the community has shown interest in developing prompt strategies that effectively integrate KG information into LLMs. However, the format for incorporating KGs into LLMs lacks standardization; for instance, KGs can be transformed into linearized triples or natural language (NL) text. Current prompting methods often rely on a trial-and-error approach, leaving researchers with an incomplete understanding of which KG input format best facilitates LLM comprehension of KG content. To elucidate this, we design a series of experiments to explore LLMs' understanding of different KG input formats within the context of prompt engineering. Our analysis examines both literal and attention distribution levels. Through extensive experiments, we indicate a counter-intuitive phenomenon: when addressing fact-related questions, unordered linearized triples are more effective for LLMs' understanding of KGs compared to fluent NL text. Furthermore, noisy, incomplete, or marginally relevant subgraphs can still enhance LLM performance. Finally, different LLMs have distinct preferences for different formats of organizing unordered triples. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11541v3-abstract-full').style.display = 'none'; document.getElementById('2402.11541v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.4; I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.11178">arXiv:2402.11178</a> <span> [<a href="https://arxiv.org/pdf/2402.11178">pdf</a>, <a href="https://arxiv.org/format/2402.11178">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> RENOVI: A Benchmark Towards Remediating Norm Violations in Socio-Cultural Conversations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhan%2C+H">Haolan Zhan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuang Li</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+X">Xiaoxi Kang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+T">Tao Feng</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yuncheng Hua</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+L">Lizhen Qu</a>, <a href="/search/cs?searchtype=author&query=Ying%2C+Y">Yi Ying</a>, <a href="/search/cs?searchtype=author&query=Chandra%2C+M+R">Mei Rianto Chandra</a>, <a href="/search/cs?searchtype=author&query=Rosalin%2C+K">Kelly Rosalin</a>, <a href="/search/cs?searchtype=author&query=Jureynolds%2C+J">Jureynolds Jureynolds</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+S">Suraj Sharma</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+S">Shilin Qu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+L">Linhao Luo</a>, <a href="/search/cs?searchtype=author&query=Soon%2C+L">Lay-Ki Soon</a>, <a href="/search/cs?searchtype=author&query=Azad%2C+Z+S">Zhaleh Semnani Azad</a>, <a href="/search/cs?searchtype=author&query=Zukerman%2C+I">Ingrid Zukerman</a>, <a href="/search/cs?searchtype=author&query=Haffari%2C+G">Gholamreza Haffari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.11178v1-abstract-short" style="display: inline;"> Norm violations occur when individuals fail to conform to culturally accepted behaviors, which may lead to potential conflicts. Remediating norm violations requires social awareness and cultural sensitivity of the nuances at play. To equip interactive AI systems with a remediation ability, we offer ReNoVi - a large-scale corpus of 9,258 multi-turn dialogues annotated with social norms, as well as… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11178v1-abstract-full').style.display = 'inline'; document.getElementById('2402.11178v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.11178v1-abstract-full" style="display: none;"> Norm violations occur when individuals fail to conform to culturally accepted behaviors, which may lead to potential conflicts. Remediating norm violations requires social awareness and cultural sensitivity of the nuances at play. To equip interactive AI systems with a remediation ability, we offer ReNoVi - a large-scale corpus of 9,258 multi-turn dialogues annotated with social norms, as well as define a sequence of tasks to help understand and remediate norm violations step by step. ReNoVi consists of two parts: 512 human-authored dialogues (real data), and 8,746 synthetic conversations generated by ChatGPT through prompt learning. While collecting sufficient human-authored data is costly, synthetic conversations provide suitable amounts of data to help mitigate the scarcity of training data, as well as the chance to assess the alignment between LLMs and humans in the awareness of social norms. We thus harness the power of ChatGPT to generate synthetic training data for our task. To ensure the quality of both human-authored and synthetic data, we follow a quality control protocol during data collection. Our experimental results demonstrate the importance of remediating norm violations in socio-cultural conversations, as well as the improvement in performance obtained from synthetic data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.11178v1-abstract-full').style.display = 'none'; document.getElementById('2402.11178v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">work in progress. 15 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.09257">arXiv:2402.09257</a> <span> [<a href="https://arxiv.org/pdf/2402.09257">pdf</a>, <a href="https://arxiv.org/format/2402.09257">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/978-3-031-19833-5_17">10.1007/978-3-031-19833-5_17 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> TDViT: Temporal Dilated Video Transformer for Dense Video Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+G">Guanxiong Sun</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+G">Guosheng Hu</a>, <a href="/search/cs?searchtype=author&query=Robertson%2C+N">Neil Robertson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.09257v1-abstract-short" style="display: inline;"> Deep video models, for example, 3D CNNs or video transformers, have achieved promising performance on sparse video tasks, i.e., predicting one result per video. However, challenges arise when adapting existing deep video models to dense video tasks, i.e., predicting one result per frame. Specifically, these models are expensive for deployment, less effective when handling redundant frames, and dif… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09257v1-abstract-full').style.display = 'inline'; document.getElementById('2402.09257v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.09257v1-abstract-full" style="display: none;"> Deep video models, for example, 3D CNNs or video transformers, have achieved promising performance on sparse video tasks, i.e., predicting one result per video. However, challenges arise when adapting existing deep video models to dense video tasks, i.e., predicting one result per frame. Specifically, these models are expensive for deployment, less effective when handling redundant frames, and difficult to capture long-range temporal correlations. To overcome these issues, we propose a Temporal Dilated Video Transformer (TDViT) that consists of carefully designed temporal dilated transformer blocks (TDTB). TDTB can efficiently extract spatiotemporal representations and effectively alleviate the negative effect of temporal redundancy. Furthermore, by using hierarchical TDTBs, our approach obtains an exponentially expanded temporal receptive field and therefore can model long-range dynamics. Extensive experiments are conducted on two different dense video benchmarks, i.e., ImageNet VID for video object detection and YouTube VIS for video instance segmentation. Excellent experimental results demonstrate the superior efficiency, effectiveness, and compatibility of our method. The code is available at https://github.com/guanxiongsun/vfe.pytorch. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09257v1-abstract-full').style.display = 'none'; document.getElementById('2402.09257v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.09241">arXiv:2402.09241</a> <span> [<a href="https://arxiv.org/pdf/2402.09241">pdf</a>, <a href="https://arxiv.org/format/2402.09241">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/978-3-031-19833-5_1">10.1007/978-3-031-19833-5_1 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Efficient One-stage Video Object Detection by Exploiting Temporal Consistency </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+G">Guanxiong Sun</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+G">Guosheng Hu</a>, <a href="/search/cs?searchtype=author&query=Robertson%2C+N">Neil Robertson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.09241v1-abstract-short" style="display: inline;"> Recently, one-stage detectors have achieved competitive accuracy and faster speed compared with traditional two-stage detectors on image data. However, in the field of video object detection (VOD), most existing VOD methods are still based on two-stage detectors. Moreover, directly adapting existing VOD methods to one-stage detectors introduces unaffordable computational costs. In this paper, we f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09241v1-abstract-full').style.display = 'inline'; document.getElementById('2402.09241v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.09241v1-abstract-full" style="display: none;"> Recently, one-stage detectors have achieved competitive accuracy and faster speed compared with traditional two-stage detectors on image data. However, in the field of video object detection (VOD), most existing VOD methods are still based on two-stage detectors. Moreover, directly adapting existing VOD methods to one-stage detectors introduces unaffordable computational costs. In this paper, we first analyse the computational bottlenecks of using one-stage detectors for VOD. Based on the analysis, we present a simple yet efficient framework to address the computational bottlenecks and achieve efficient one-stage VOD by exploiting the temporal consistency in video frames. Specifically, our method consists of a location-prior network to filter out background regions and a size-prior network to skip unnecessary computations on low-level feature maps for specific frames. We test our method on various modern one-stage detectors and conduct extensive experiments on the ImageNet VID dataset. Excellent experimental results demonstrate the superior effectiveness, efficiency, and compatibility of our method. The code is available at https://github.com/guanxiongsun/vfe.pytorch. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.09241v1-abstract-full').style.display = 'none'; document.getElementById('2402.09241v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.02574">arXiv:2402.02574</a> <span> [<a href="https://arxiv.org/pdf/2402.02574">pdf</a>, <a href="https://arxiv.org/format/2402.02574">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICCV51070.2023.01250">10.1109/ICCV51070.2023.01250 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Spatio-temporal Prompting Network for Robust Video Feature Extraction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+G">Guanxiong Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chi Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhaoyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+J">Jiankang Deng</a>, <a href="/search/cs?searchtype=author&query=Zafeiriou%2C+S">Stefanos Zafeiriou</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yang Hua</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.02574v1-abstract-short" style="display: inline;"> Frame quality deterioration is one of the main challenges in the field of video understanding. To compensate for the information loss caused by deteriorated frames, recent approaches exploit transformer-based integration modules to obtain spatio-temporal information. However, these integration modules are heavy and complex. Furthermore, each integration module is specifically tailored for its targ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02574v1-abstract-full').style.display = 'inline'; document.getElementById('2402.02574v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.02574v1-abstract-full" style="display: none;"> Frame quality deterioration is one of the main challenges in the field of video understanding. To compensate for the information loss caused by deteriorated frames, recent approaches exploit transformer-based integration modules to obtain spatio-temporal information. However, these integration modules are heavy and complex. Furthermore, each integration module is specifically tailored for its target task, making it difficult to generalise to multiple tasks. In this paper, we present a neat and unified framework, called Spatio-Temporal Prompting Network (STPN). It can efficiently extract robust and accurate video features by dynamically adjusting the input features in the backbone network. Specifically, STPN predicts several video prompts containing spatio-temporal information of neighbour frames. Then, these video prompts are prepended to the patch embeddings of the current frame as the updated input for video feature extraction. Moreover, STPN is easy to generalise to various video tasks because it does not contain task-specific modules. Without bells and whistles, STPN achieves state-of-the-art performance on three widely-used datasets for different video understanding tasks, i.e., ImageNetVID for video object detection, YouTubeVIS for video instance segmentation, and GOT-10k for visual object tracking. Code is available at https://github.com/guanxiongsun/vfe.pytorch. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02574v1-abstract-full').style.display = 'none'; document.getElementById('2402.02574v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2023 International Conference on Computer Vision (ICCV) 13541-13551 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.01737">arXiv:2402.01737</a> <span> [<a href="https://arxiv.org/pdf/2402.01737">pdf</a>, <a href="https://arxiv.org/format/2402.01737">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Assistive Large Language Model Agents for Socially-Aware Negotiation Dialogues </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yuncheng Hua</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+L">Lizhen Qu</a>, <a href="/search/cs?searchtype=author&query=Haffari%2C+G">Gholamreza Haffari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.01737v2-abstract-short" style="display: inline;"> We develop assistive agents based on Large Language Models (LLMs) that aid interlocutors in business negotiations. Specifically, we simulate business negotiations by letting two LLM-based agents engage in role play. A third LLM acts as a remediator agent to rewrite utterances violating norms for improving negotiation outcomes. We introduce a simple tuning-free and label-free In-Context Learning (I… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01737v2-abstract-full').style.display = 'inline'; document.getElementById('2402.01737v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.01737v2-abstract-full" style="display: none;"> We develop assistive agents based on Large Language Models (LLMs) that aid interlocutors in business negotiations. Specifically, we simulate business negotiations by letting two LLM-based agents engage in role play. A third LLM acts as a remediator agent to rewrite utterances violating norms for improving negotiation outcomes. We introduce a simple tuning-free and label-free In-Context Learning (ICL) method to identify high-quality ICL exemplars for the remediator, where we propose a novel select criteria, called value impact, to measure the quality of the negotiation outcomes. We provide rich empirical evidence to demonstrate its effectiveness in negotiations across three different negotiation topics. The source code and the generated dataset will be publicly available upon acceptance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01737v2-abstract-full').style.display = 'none'; document.getElementById('2402.01737v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages, 3 figures, 13 tables; Under review in EMNLP 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.01736">arXiv:2402.01736</a> <span> [<a href="https://arxiv.org/pdf/2402.01736">pdf</a>, <a href="https://arxiv.org/format/2402.01736">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SADAS: A Dialogue Assistant System Towards Remediating Norm Violations in Bilingual Socio-Cultural Conversations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yuncheng Hua</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuang Li</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+L">Linhao Luo</a>, <a href="/search/cs?searchtype=author&query=Satriadi%2C+K+A">Kadek Ananta Satriadi</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+T">Tao Feng</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+H">Haolan Zhan</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+L">Lizhen Qu</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+S">Suraj Sharma</a>, <a href="/search/cs?searchtype=author&query=Zukerman%2C+I">Ingrid Zukerman</a>, <a href="/search/cs?searchtype=author&query=Semnani-Azad%2C+Z">Zhaleh Semnani-Azad</a>, <a href="/search/cs?searchtype=author&query=Haffari%2C+G">Gholamreza Haffari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.01736v1-abstract-short" style="display: inline;"> In today's globalized world, bridging the cultural divide is more critical than ever for forging meaningful connections. The Socially-Aware Dialogue Assistant System (SADAS) is our answer to this global challenge, and it's designed to ensure that conversations between individuals from diverse cultural backgrounds unfold with respect and understanding. Our system's novel architecture includes: (1)… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01736v1-abstract-full').style.display = 'inline'; document.getElementById('2402.01736v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.01736v1-abstract-full" style="display: none;"> In today's globalized world, bridging the cultural divide is more critical than ever for forging meaningful connections. The Socially-Aware Dialogue Assistant System (SADAS) is our answer to this global challenge, and it's designed to ensure that conversations between individuals from diverse cultural backgrounds unfold with respect and understanding. Our system's novel architecture includes: (1) identifying the categories of norms present in the dialogue, (2) detecting potential norm violations, (3) evaluating the severity of these violations, (4) implementing targeted remedies to rectify the breaches, and (5) articulates the rationale behind these corrective actions. We employ a series of State-Of-The-Art (SOTA) techniques to build different modules, and conduct numerous experiments to select the most suitable backbone model for each of the modules. We also design a human preference experiment to validate the overall performance of the system. We will open-source our system (including source code, tools and applications), hoping to advance future research. A demo video of our system can be found at:~\url{https://youtu.be/JqetWkfsejk}. We have released our code and software at:~\url{https://github.com/AnonymousEACLDemo/SADAS}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01736v1-abstract-full').style.display = 'none'; document.getElementById('2402.01736v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 2 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.01097">arXiv:2402.01097</a> <span> [<a href="https://arxiv.org/pdf/2402.01097">pdf</a>, <a href="https://arxiv.org/format/2402.01097">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Let's Negotiate! A Survey of Negotiation Dialogue Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhan%2C+H">Haolan Zhan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yufei Wang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+T">Tao Feng</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yuncheng Hua</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+S">Suraj Sharma</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuang Li</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+L">Lizhen Qu</a>, <a href="/search/cs?searchtype=author&query=Azad%2C+Z+S">Zhaleh Semnani Azad</a>, <a href="/search/cs?searchtype=author&query=Zukerman%2C+I">Ingrid Zukerman</a>, <a href="/search/cs?searchtype=author&query=Haffari%2C+G">Gholamreza Haffari</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.01097v1-abstract-short" style="display: inline;"> Negotiation is a crucial ability in human communication. Recently, there has been a resurgent research interest in negotiation dialogue systems, whose goal is to create intelligent agents that can assist people in resolving conflicts or reaching agreements. Although there have been many explorations into negotiation dialogue systems, a systematic review of this task has not been performed to date.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01097v1-abstract-full').style.display = 'inline'; document.getElementById('2402.01097v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.01097v1-abstract-full" style="display: none;"> Negotiation is a crucial ability in human communication. Recently, there has been a resurgent research interest in negotiation dialogue systems, whose goal is to create intelligent agents that can assist people in resolving conflicts or reaching agreements. Although there have been many explorations into negotiation dialogue systems, a systematic review of this task has not been performed to date. We aim to fill this gap by investigating recent studies in the field of negotiation dialogue systems, and covering benchmarks, evaluations and methodologies within the literature. We also discuss potential future directions, including multi-modal, multi-party and cross-cultural negotiation scenarios. Our goal is to provide the community with a systematic overview of negotiation dialogue systems and to inspire future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01097v1-abstract-full').style.display = 'none'; document.getElementById('2402.01097v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by EACL 2024 (findings). arXiv admin note: substantial text overlap with arXiv:2212.09072</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.09923">arXiv:2401.09923</a> <span> [<a href="https://arxiv.org/pdf/2401.09923">pdf</a>, <a href="https://arxiv.org/format/2401.09923">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1609/aaai.v35i3.16365">10.1609/aaai.v35i3.16365 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> MAMBA: Multi-level Aggregation via Memory Bank for Video Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+G">Guanxiong Sun</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+G">Guosheng Hu</a>, <a href="/search/cs?searchtype=author&query=Robertson%2C+N">Neil Robertson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.09923v2-abstract-short" style="display: inline;"> State-of-the-art video object detection methods maintain a memory structure, either a sliding window or a memory queue, to enhance the current frame using attention mechanisms. However, we argue that these memory structures are not efficient or sufficient because of two implied operations: (1) concatenating all features in memory for enhancement, leading to a heavy computational cost; (2) frame-wi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.09923v2-abstract-full').style.display = 'inline'; document.getElementById('2401.09923v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.09923v2-abstract-full" style="display: none;"> State-of-the-art video object detection methods maintain a memory structure, either a sliding window or a memory queue, to enhance the current frame using attention mechanisms. However, we argue that these memory structures are not efficient or sufficient because of two implied operations: (1) concatenating all features in memory for enhancement, leading to a heavy computational cost; (2) frame-wise memory updating, preventing the memory from capturing more temporal information. In this paper, we propose a multi-level aggregation architecture via memory bank called MAMBA. Specifically, our memory bank employs two novel operations to eliminate the disadvantages of existing methods: (1) light-weight key-set construction which can significantly reduce the computational cost; (2) fine-grained feature-wise updating strategy which enables our method to utilize knowledge from the whole video. To better enhance features from complementary levels, i.e., feature maps and proposals, we further propose a generalized enhancement operation (GEO) to aggregate multi-level features in a unified manner. We conduct extensive evaluations on the challenging ImageNetVID dataset. Compared with existing state-of-the-art methods, our method achieves superior performance in terms of both speed and accuracy. More remarkably, MAMBA achieves mAP of 83.7/84.6% at 12.6/9.1 FPS with ResNet-101. Code is available at https://github.com/guanxiongsun/vfe.pytorch. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.09923v2-abstract-full').style.display = 'none'; document.getElementById('2401.09923v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">update code url https://github.com/guanxiongsun/vfe.pytorch</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> In Proceedings of the AAAI Conference on Artificial Intelligence 2021 (Vol. 35, No. 3, pp. 2620-2627) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.03230">arXiv:2401.03230</a> <span> [<a href="https://arxiv.org/pdf/2401.03230">pdf</a>, <a href="https://arxiv.org/format/2401.03230">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> FedTGP: Trainable Global Prototypes with Adaptive-Margin-Enhanced Contrastive Learning for Data and Model Heterogeneity in Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+J">Jian Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.03230v1-abstract-short" style="display: inline;"> Recently, Heterogeneous Federated Learning (HtFL) has attracted attention due to its ability to support heterogeneous models and data. To reduce the high communication cost of transmitting model parameters, a major challenge in HtFL, prototype-based HtFL methods are proposed to solely share class representatives, a.k.a, prototypes, among heterogeneous clients while maintaining the privacy of clien… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03230v1-abstract-full').style.display = 'inline'; document.getElementById('2401.03230v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.03230v1-abstract-full" style="display: none;"> Recently, Heterogeneous Federated Learning (HtFL) has attracted attention due to its ability to support heterogeneous models and data. To reduce the high communication cost of transmitting model parameters, a major challenge in HtFL, prototype-based HtFL methods are proposed to solely share class representatives, a.k.a, prototypes, among heterogeneous clients while maintaining the privacy of clients' models. However, these prototypes are naively aggregated into global prototypes on the server using weighted averaging, resulting in suboptimal global knowledge which negatively impacts the performance of clients. To overcome this challenge, we introduce a novel HtFL approach called FedTGP, which leverages our Adaptive-margin-enhanced Contrastive Learning (ACL) to learn Trainable Global Prototypes (TGP) on the server. By incorporating ACL, our approach enhances prototype separability while preserving semantic meaning. Extensive experiments with twelve heterogeneous models demonstrate that our FedTGP surpasses state-of-the-art methods by up to 9.08% in accuracy while maintaining the communication and privacy advantages of prototype-based HtFL. Our code is available at https://github.com/TsingZ0/FedTGP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03230v1-abstract-full').style.display = 'none'; document.getElementById('2401.03230v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.02984">arXiv:2401.02984</a> <span> [<a href="https://arxiv.org/pdf/2401.02984">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Large Language Models in Mental Health Care: a Scoping Review </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yining Hua</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Fenglin Liu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+K">Kailai Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zehan Li</a>, <a href="/search/cs?searchtype=author&query=Na%2C+H">Hongbin Na</a>, <a href="/search/cs?searchtype=author&query=Sheu%2C+Y">Yi-han Sheu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+P">Peilin Zhou</a>, <a href="/search/cs?searchtype=author&query=Moran%2C+L+V">Lauren V. Moran</a>, <a href="/search/cs?searchtype=author&query=Ananiadou%2C+S">Sophia Ananiadou</a>, <a href="/search/cs?searchtype=author&query=Beam%2C+A">Andrew Beam</a>, <a href="/search/cs?searchtype=author&query=Torous%2C+J">John Torous</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.02984v2-abstract-short" style="display: inline;"> The integration of large language models (LLMs) in mental health care is an emerging field. There is a need to systematically review the application outcomes and delineate the advantages and limitations in clinical settings. This review aims to provide a comprehensive overview of the use of LLMs in mental health care, assessing their efficacy, challenges, and potential for future applications. A s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.02984v2-abstract-full').style.display = 'inline'; document.getElementById('2401.02984v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.02984v2-abstract-full" style="display: none;"> The integration of large language models (LLMs) in mental health care is an emerging field. There is a need to systematically review the application outcomes and delineate the advantages and limitations in clinical settings. This review aims to provide a comprehensive overview of the use of LLMs in mental health care, assessing their efficacy, challenges, and potential for future applications. A systematic search was conducted across multiple databases including PubMed, Web of Science, Google Scholar, arXiv, medRxiv, and PsyArXiv in November 2023. All forms of original research, peer-reviewed or not, published or disseminated between October 1, 2019, and December 2, 2023, are included without language restrictions if they used LLMs developed after T5 and directly addressed research questions in mental health care settings. From an initial pool of 313 articles, 34 met the inclusion criteria based on their relevance to LLM application in mental health care and the robustness of reported outcomes. Diverse applications of LLMs in mental health care are identified, including diagnosis, therapy, patient engagement enhancement, etc. Key challenges include data availability and reliability, nuanced handling of mental states, and effective evaluation methods. Despite successes in accuracy and accessibility improvement, gaps in clinical applicability and ethical considerations were evident, pointing to the need for robust data, standardized evaluations, and interdisciplinary collaboration. LLMs hold substantial promise for enhancing mental health care. For their full potential to be realized, emphasis must be placed on developing robust datasets, development and evaluation frameworks, ethical guidelines, and interdisciplinary collaborations to address current limitations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.02984v2-abstract-full').style.display = 'none'; document.getElementById('2401.02984v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.12484">arXiv:2312.12484</a> <span> [<a href="https://arxiv.org/pdf/2312.12484">pdf</a>, <a href="https://arxiv.org/format/2312.12484">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SkyMask: Attack-agnostic Robust Federated Learning with Fine-grained Learnable Masks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yan%2C+P">Peishen Yan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+T">Tao Song</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+R">Ruhui Ma</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+N">Ningxin Hu</a>, <a href="/search/cs?searchtype=author&query=Haghighat%2C+M+R">Mohammad R. Haghighat</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+H">Haibing Guan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.12484v2-abstract-short" style="display: inline;"> Federated Learning (FL) is becoming a popular paradigm for leveraging distributed data and preserving data privacy. However, due to the distributed characteristic, FL systems are vulnerable to Byzantine attacks that compromised clients attack the global model by uploading malicious model updates. With the development of layer-level and parameter-level fine-grained attacks, the attacks' stealthines… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.12484v2-abstract-full').style.display = 'inline'; document.getElementById('2312.12484v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.12484v2-abstract-full" style="display: none;"> Federated Learning (FL) is becoming a popular paradigm for leveraging distributed data and preserving data privacy. However, due to the distributed characteristic, FL systems are vulnerable to Byzantine attacks that compromised clients attack the global model by uploading malicious model updates. With the development of layer-level and parameter-level fine-grained attacks, the attacks' stealthiness and effectiveness have been significantly improved. The existing defense mechanisms solely analyze the model-level statistics of individual model updates uploaded by clients to mitigate Byzantine attacks, which are ineffective against fine-grained attacks due to unawareness or overreaction. To address this problem, we propose SkyMask, a new attack-agnostic robust FL system that firstly leverages fine-grained learnable masks to identify malicious model updates at the parameter level. Specifically, the FL server freezes and multiplies the model updates uploaded by clients with the parameter-level masks, and trains the masks over a small clean dataset (i.e., root dataset) to learn the subtle difference between benign and malicious model updates in a high-dimension space. Our extensive experiments involve different models on three public datasets under state-of-the-art (SOTA) attacks, where the results show that SkyMask achieves up to 14% higher testing accuracy compared with SOTA defense strategies under the same attacks and successfully defends against attacks with malicious clients of a high fraction up to 80%. Code is available at https://github.com/KoalaYan/SkyMask. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.12484v2-abstract-full').style.display = 'none'; document.getElementById('2312.12484v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ECCV2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.04992">arXiv:2312.04992</a> <span> [<a href="https://arxiv.org/pdf/2312.04992">pdf</a>, <a href="https://arxiv.org/ps/2312.04992">ps</a>, <a href="https://arxiv.org/format/2312.04992">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> PFLlib: Personalized Federated Learning Algorithm Library </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+T">Tao Song</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+Z">Zhengui Xue</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+R">Ruhui Ma</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+J">Jian Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.04992v1-abstract-short" style="display: inline;"> Amid the ongoing advancements in Federated Learning (FL), a machine learning paradigm that allows collaborative learning with data privacy protection, personalized FL (pFL) has gained significant prominence as a research direction within the FL domain. Whereas traditional FL (tFL) focuses on jointly learning a global model, pFL aims to achieve a balance between the global and personalized objectiv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.04992v1-abstract-full').style.display = 'inline'; document.getElementById('2312.04992v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.04992v1-abstract-full" style="display: none;"> Amid the ongoing advancements in Federated Learning (FL), a machine learning paradigm that allows collaborative learning with data privacy protection, personalized FL (pFL) has gained significant prominence as a research direction within the FL domain. Whereas traditional FL (tFL) focuses on jointly learning a global model, pFL aims to achieve a balance between the global and personalized objectives of each client in FL settings. To foster the pFL research community, we propose PFLlib, a comprehensive pFL algorithm library with an integrated evaluation platform. In PFLlib, We implement 34 state-of-the-art FL algorithms (including 7 classic tFL algorithms and 27 pFL algorithms) and provide various evaluation environments with three statistically heterogeneous scenarios and 14 datasets. At present, PFLlib has already gained 850 stars and 199 forks on GitHub. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.04992v1-abstract-full').style.display = 'none'; document.getElementById('2312.04992v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.03290">arXiv:2312.03290</a> <span> [<a href="https://arxiv.org/pdf/2312.03290">pdf</a>, <a href="https://arxiv.org/format/2312.03290">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Can language agents be alternatives to PPO? A Preliminary Empirical Study On OpenAI Gym </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sheng%2C+J">Junjie Sheng</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zixiao Huang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+C">Chuyun Shen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenhao Li</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yun Hua</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+B">Bo Jin</a>, <a href="/search/cs?searchtype=author&query=Zha%2C+H">Hongyuan Zha</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiangfeng Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.03290v1-abstract-short" style="display: inline;"> The formidable capacity for zero- or few-shot decision-making in language agents encourages us to pose a compelling question: Can language agents be alternatives to PPO agents in traditional sequential decision-making tasks? To investigate this, we first take environments collected in OpenAI Gym as our testbeds and ground them to textual environments that construct the TextGym simulator. This allo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.03290v1-abstract-full').style.display = 'inline'; document.getElementById('2312.03290v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.03290v1-abstract-full" style="display: none;"> The formidable capacity for zero- or few-shot decision-making in language agents encourages us to pose a compelling question: Can language agents be alternatives to PPO agents in traditional sequential decision-making tasks? To investigate this, we first take environments collected in OpenAI Gym as our testbeds and ground them to textual environments that construct the TextGym simulator. This allows for straightforward and efficient comparisons between PPO agents and language agents, given the widespread adoption of OpenAI Gym. To ensure a fair and effective benchmarking, we introduce $5$ levels of scenario for accurate domain-knowledge controlling and a unified RL-inspired framework for language agents. Additionally, we propose an innovative explore-exploit-guided language (EXE) agent to solve tasks within TextGym. Through numerical experiments and ablation studies, we extract valuable insights into the decision-making capabilities of language agents and make a preliminary evaluation of their potential to be alternatives to PPO in classical sequential decision-making problems. This paper sheds light on the performance of language agents and paves the way for future research in this exciting domain. Our code is publicly available at~\url{https://github.com/mail-ecnu/Text-Gym-Agents}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.03290v1-abstract-full').style.display = 'none'; document.getElementById('2312.03290v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.14975">arXiv:2311.14975</a> <span> [<a href="https://arxiv.org/pdf/2311.14975">pdf</a>, <a href="https://arxiv.org/format/2311.14975">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Eliminating Domain Bias for Federated Learning in Representation Space </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+J">Jian Cao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+T">Tao Song</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+Z">Zhengui Xue</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+R">Ruhui Ma</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+H">Haibing Guan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.14975v1-abstract-short" style="display: inline;"> Recently, federated learning (FL) is popular for its privacy-preserving and collaborative learning abilities. However, under statistically heterogeneous scenarios, we observe that biased data domains on clients cause a representation bias phenomenon and further degenerate generic representations during local training, i.e., the representation degeneration phenomenon. To address these issues, we pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.14975v1-abstract-full').style.display = 'inline'; document.getElementById('2311.14975v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.14975v1-abstract-full" style="display: none;"> Recently, federated learning (FL) is popular for its privacy-preserving and collaborative learning abilities. However, under statistically heterogeneous scenarios, we observe that biased data domains on clients cause a representation bias phenomenon and further degenerate generic representations during local training, i.e., the representation degeneration phenomenon. To address these issues, we propose a general framework Domain Bias Eliminator (DBE) for FL. Our theoretical analysis reveals that DBE can promote bi-directional knowledge transfer between server and client, as it reduces the domain discrepancy between server and client in representation space. Besides, extensive experiments on four datasets show that DBE can greatly improve existing FL methods in both generalization and personalization abilities. The DBE-equipped FL method can outperform ten state-of-the-art personalized FL methods by a large margin. Our code is public at https://github.com/TsingZ0/DBE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.14975v1-abstract-full').style.display = 'none'; document.getElementById('2311.14975v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2023, 24 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.10777">arXiv:2311.10777</a> <span> [<a href="https://arxiv.org/pdf/2311.10777">pdf</a>, <a href="https://arxiv.org/format/2311.10777">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s10462-024-10906-z">10.1007/s10462-024-10906-z <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Systematic Review of Aspect-based Sentiment Analysis: Domains, Methods, and Trends </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+Y+C">Yan Cathy Hua</a>, <a href="/search/cs?searchtype=author&query=Denny%2C+P">Paul Denny</a>, <a href="/search/cs?searchtype=author&query=Taskova%2C+K">Katerina Taskova</a>, <a href="/search/cs?searchtype=author&query=Wicker%2C+J">J枚rg Wicker</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.10777v6-abstract-short" style="display: inline;"> Aspect-based sentiment analysis (ABSA) is a fine-grained type of sentiment analysis that identifies aspects and their associated opinions from a given text. With the surge of digital opinionated text data, ABSA gained increasing popularity for its ability to mine more detailed and targeted insights. Many review papers on ABSA subtasks and solution methodologies exist, however, few focus on trends… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.10777v6-abstract-full').style.display = 'inline'; document.getElementById('2311.10777v6-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.10777v6-abstract-full" style="display: none;"> Aspect-based sentiment analysis (ABSA) is a fine-grained type of sentiment analysis that identifies aspects and their associated opinions from a given text. With the surge of digital opinionated text data, ABSA gained increasing popularity for its ability to mine more detailed and targeted insights. Many review papers on ABSA subtasks and solution methodologies exist, however, few focus on trends over time or systemic issues relating to research application domains, datasets, and solution approaches. To fill the gap, this paper presents a systematic literature review (SLR) of ABSA studies with a focus on trends and high-level relationships among these fundamental components. This review is one of the largest SLRs on ABSA. To our knowledge, it is also the first to systematically examine the interrelations among ABSA research and data distribution across domains, as well as trends in solution paradigms and approaches. Our sample includes 727 primary studies screened from 8550 search results without time constraints via an innovative automatic filtering process. Our quantitative analysis not only identifies trends in nearly two decades of ABSA research development but also unveils a systemic lack of dataset and domain diversity as well as domain mismatch that may hinder the development of future ABSA research. We discuss these findings and their implications and propose suggestions for future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.10777v6-abstract-full').style.display = 'none'; document.getElementById('2311.10777v6-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Artif Intell Rev 57, 296 (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.05112">arXiv:2311.05112</a> <span> [<a href="https://arxiv.org/pdf/2311.05112">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Survey of Large Language Models in Medicine: Progress, Application, and Challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Hongjian Zhou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Fenglin Liu</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+B">Boyang Gu</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+X">Xinyu Zou</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jinfa Huang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jinge Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yiru Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S+S">Sam S. Chen</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+P">Peilin Zhou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junling Liu</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yining Hua</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+C">Chengfeng Mao</a>, <a href="/search/cs?searchtype=author&query=You%2C+C">Chenyu You</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xian Wu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a>, <a href="/search/cs?searchtype=author&query=Clifton%2C+L">Lei Clifton</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zheng Li</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+J">Jiebo Luo</a>, <a href="/search/cs?searchtype=author&query=Clifton%2C+D+A">David A. Clifton</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.05112v7-abstract-short" style="display: inline;"> Large language models (LLMs), such as ChatGPT, have received substantial attention due to their capabilities for understanding and generating human language. While there has been a burgeoning trend in research focusing on the employment of LLMs in supporting different medical tasks (e.g., enhancing clinical diagnostics and providing medical education), a review of these efforts, particularly their… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.05112v7-abstract-full').style.display = 'inline'; document.getElementById('2311.05112v7-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.05112v7-abstract-full" style="display: none;"> Large language models (LLMs), such as ChatGPT, have received substantial attention due to their capabilities for understanding and generating human language. While there has been a burgeoning trend in research focusing on the employment of LLMs in supporting different medical tasks (e.g., enhancing clinical diagnostics and providing medical education), a review of these efforts, particularly their development, practical applications, and outcomes in medicine, remains scarce. Therefore, this review aims to provide a detailed overview of the development and deployment of LLMs in medicine, including the challenges and opportunities they face. In terms of development, we provide a detailed introduction to the principles of existing medical LLMs, including their basic model structures, number of parameters, and sources and scales of data used for model development. It serves as a guide for practitioners in developing medical LLMs tailored to their specific needs. In terms of deployment, we offer a comparison of the performance of different LLMs across various medical tasks, and further compare them with state-of-the-art lightweight models, aiming to provide an understanding of the advantages and limitations of LLMs in medicine. Overall, in this review, we address the following questions: 1) What are the practices for developing medical LLMs 2) How to measure the medical task performance of LLMs in a medical setting? 3) How have medical LLMs been employed in real-world practice? 4) What challenges arise from the use of medical LLMs? and 5) How to more effectively develop and deploy medical LLMs? By answering these questions, this review aims to provide insights into the opportunities for LLMs in medicine and serve as a practical resource. We also maintain a regularly updated list of practical guides on medical LLMs at https://github.com/AI-in-Health/MedLLMsPracticalGuide <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.05112v7-abstract-full').style.display = 'none'; document.getElementById('2311.05112v7-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint. Version 6. Update Figures 1-5; Tables 2-3; 31 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.04199">arXiv:2311.04199</a> <span> [<a href="https://arxiv.org/pdf/2311.04199">pdf</a>, <a href="https://arxiv.org/format/2311.04199">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Exploring Recommendation Capabilities of GPT-4V(ision): A Preliminary Case Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+P">Peilin Zhou</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+M">Meng Cao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">You-Liang Huang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Q">Qichen Ye</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+P">Peiyan Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junling Liu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yueqi Xie</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yining Hua</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jaeboum Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.04199v1-abstract-short" style="display: inline;"> Large Multimodal Models (LMMs) have demonstrated impressive performance across various vision and language tasks, yet their potential applications in recommendation tasks with visual assistance remain unexplored. To bridge this gap, we present a preliminary case study investigating the recommendation capabilities of GPT-4V(ison), a recently released LMM by OpenAI. We construct a series of qualitat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.04199v1-abstract-full').style.display = 'inline'; document.getElementById('2311.04199v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.04199v1-abstract-full" style="display: none;"> Large Multimodal Models (LMMs) have demonstrated impressive performance across various vision and language tasks, yet their potential applications in recommendation tasks with visual assistance remain unexplored. To bridge this gap, we present a preliminary case study investigating the recommendation capabilities of GPT-4V(ison), a recently released LMM by OpenAI. We construct a series of qualitative test samples spanning multiple domains and employ these samples to assess the quality of GPT-4V's responses within recommendation scenarios. Evaluation results on these test samples prove that GPT-4V has remarkable zero-shot recommendation abilities across diverse domains, thanks to its robust visual-text comprehension capabilities and extensive general knowledge. However, we have also identified some limitations in using GPT-4V for recommendations, including a tendency to provide similar responses when given similar inputs. This report concludes with an in-depth discussion of the challenges and research opportunities associated with utilizing GPT-4V in recommendation scenarios. Our objective is to explore the potential of extending LMMs from vision and language tasks to recommendation tasks. We hope to inspire further research into next-generation multimodal generative recommendation models, which can enhance user experiences by offering greater diversity and interactivity. All images and prompts used in this report will be accessible at https://github.com/PALIN2018/Evaluate_GPT-4V_Rec. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.04199v1-abstract-full').style.display = 'none'; document.getElementById('2311.04199v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In Progress</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.00204">arXiv:2311.00204</a> <span> [<a href="https://arxiv.org/pdf/2311.00204">pdf</a>, <a href="https://arxiv.org/format/2311.00204">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Continuous Training and Fine-tuning for Domain-Specific Language Models in Medical Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Zhen Guo</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yining Hua</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.00204v1-abstract-short" style="display: inline;"> Large language models exhibit promising general capabilities but often lack specialized knowledge for domain-specific tasks. Developing domain experts from a base model enables a range of applications without prohibitive training costs. This work demonstrates a method using continuous training and instruction fine-tuning to rapidly adapt Llama 2 base models to the Chinese medical domain. We first… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.00204v1-abstract-full').style.display = 'inline'; document.getElementById('2311.00204v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.00204v1-abstract-full" style="display: none;"> Large language models exhibit promising general capabilities but often lack specialized knowledge for domain-specific tasks. Developing domain experts from a base model enables a range of applications without prohibitive training costs. This work demonstrates a method using continuous training and instruction fine-tuning to rapidly adapt Llama 2 base models to the Chinese medical domain. We first conduct continuous training on 1B tokens from Chinese medical references to teach relevant vocabulary and knowledge. The models are then fine-tuned on 54K examples sourced from the Chinese National Medical Licensing Examination. Experiments on Chinese medical data confirm the effectiveness of this approach, producing a model comparable to GPT-3.5-turbo while using way less computational resource. The resulting domain-specific model could be useful for various Chinese medical applications. More broadly, this provides a template for domain-specific training of large language models in areas where pre-trained models lack the required expertise, such as law, science, and engineering. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.00204v1-abstract-full').style.display = 'none'; document.getElementById('2311.00204v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.17956">arXiv:2310.17956</a> <span> [<a href="https://arxiv.org/pdf/2310.17956">pdf</a>, <a href="https://arxiv.org/format/2310.17956">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Qilin-Med-VL: Towards Chinese Large Vision-Language Model for General Healthcare </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junling Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Ziming Wang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Q">Qichen Ye</a>, <a href="/search/cs?searchtype=author&query=Chong%2C+D">Dading Chong</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+P">Peilin Zhou</a>, <a href="/search/cs?searchtype=author&query=Hua%2C+Y">Yining Hua</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.17956v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) have introduced a new era of proficiency in comprehending complex healthcare and biomedical topics. However, there is a noticeable lack of models in languages other than English and models that can interpret multi-modal input, which is crucial for global healthcare accessibility. In response, this study introduces Qilin-Med-VL, the first Chinese large vision-language m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.17956v2-abstract-full').style.display = 'inline'; document.getElementById('2310.17956v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.17956v2-abstract-full" style="display: none;"> Large Language Models (LLMs) have introduced a new era of proficiency in comprehending complex healthcare and biomedical topics. However, there is a noticeable lack of models in languages other than English and models that can interpret multi-modal input, which is crucial for global healthcare accessibility. In response, this study introduces Qilin-Med-VL, the first Chinese large vision-language model designed to integrate the analysis of textual and visual data. Qilin-Med-VL combines a pre-trained Vision Transformer (ViT) with a foundational LLM. It undergoes a thorough two-stage curriculum training process that includes feature alignment and instruction tuning. This method enhances the model's ability to generate medical captions and answer complex medical queries. We also release ChiMed-VL, a dataset consisting of more than 1M image-text pairs. This dataset has been carefully curated to enable detailed and comprehensive interpretation of medical data using various types of images. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.17956v2-abstract-full').style.display = 'none'; document.getElementById('2310.17956v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Hua%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Hua%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Hua%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Hua%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Hua%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>