Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 295 results for author: <span class="mathjax">Kang, Y</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Kang%2C+Y">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Kang, Y"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Kang%2C+Y&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Kang, Y"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Kang%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Kang%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Kang%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Kang%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Kang%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Kang%2C+Y&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Kang%2C+Y&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12441">arXiv:2411.12441</a> <span> [<a href="https://arxiv.org/pdf/2411.12441">pdf</a>, <a href="https://arxiv.org/format/2411.12441">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Towards Unifying Feature Interaction Models for Click-Through Rate Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yu Kang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+J">Junwei Pan</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+J">Jipeng Jin</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+S">Shudong Huang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+X">Xiaofeng Gao</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Lei Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12441v1-abstract-short" style="display: inline;"> Modeling feature interactions plays a crucial role in accurately predicting click-through rates (CTR) in advertising systems. To capture the intricate patterns of interaction, many existing models employ matrix-factorization techniques to represent features as lower-dimensional embedding vectors, enabling the modeling of interactions as products between these embeddings. In this paper, we propose… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12441v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12441v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12441v1-abstract-full" style="display: none;"> Modeling feature interactions plays a crucial role in accurately predicting click-through rates (CTR) in advertising systems. To capture the intricate patterns of interaction, many existing models employ matrix-factorization techniques to represent features as lower-dimensional embedding vectors, enabling the modeling of interactions as products between these embeddings. In this paper, we propose a general framework called IPA to systematically unify these models. Our framework comprises three key components: the Interaction Function, which facilitates feature interaction; the Layer Pooling, which constructs higher-level interaction layers; and the Layer Aggregator, which combines the outputs of all layers to serve as input for the subsequent classifier. We demonstrate that most existing models can be categorized within our framework by making specific choices for these three components. Through extensive experiments and a dimensional collapse analysis, we evaluate the performance of these choices. Furthermore, by leveraging the most powerful components within our framework, we introduce a novel model that achieves competitive results compared to state-of-the-art CTR models. PFL gets significant GMV lift during online A/B test in Tencent's advertising platform and has been deployed as the production model in several primary scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12441v1-abstract-full').style.display = 'none'; document.getElementById('2411.12441v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11707">arXiv:2411.11707</a> <span> [<a href="https://arxiv.org/pdf/2411.11707">pdf</a>, <a href="https://arxiv.org/format/2411.11707">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> FedCoLLM: A Parameter-Efficient Federated Co-tuning Framework for Large and Small Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fan%2C+T">Tao Fan</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yan Kang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+G">Guoqiang Ma</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+L">Lixin Fan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Q">Qiang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11707v1-abstract-short" style="display: inline;"> By adapting Large Language Models (LLMs) to domain-specific tasks or enriching them with domain-specific knowledge, we can fully harness the capabilities of LLMs. Nonetheless, a gap persists in achieving simultaneous mutual enhancement between the server's LLM and the downstream clients' Small Language Models (SLMs). To address this, we propose FedCoLLM, a novel and parameter-efficient federated f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11707v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11707v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11707v1-abstract-full" style="display: none;"> By adapting Large Language Models (LLMs) to domain-specific tasks or enriching them with domain-specific knowledge, we can fully harness the capabilities of LLMs. Nonetheless, a gap persists in achieving simultaneous mutual enhancement between the server's LLM and the downstream clients' Small Language Models (SLMs). To address this, we propose FedCoLLM, a novel and parameter-efficient federated framework designed for co-tuning LLMs and SLMs. This approach is aimed at adaptively transferring server-side LLMs knowledge to clients' SLMs while simultaneously enriching the LLMs with domain insights from the clients. To accomplish this, FedCoLLM utilizes lightweight adapters in conjunction with SLMs, facilitating knowledge exchange between server and clients in a manner that respects data privacy while also minimizing computational and communication overhead. Our evaluation of FedCoLLM, utilizing various public LLMs and SLMs across a range of NLP text generation tasks, reveals that the performance of clients' SLMs experiences notable improvements with the assistance of the LLMs. Simultaneously, the LLMs enhanced via FedCoLLM achieves comparable performance to that obtained through direct fine-tuning on clients' data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11707v1-abstract-full').style.display = 'none'; document.getElementById('2411.11707v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07446">arXiv:2411.07446</a> <span> [<a href="https://arxiv.org/pdf/2411.07446">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Efficient and Accurate Prompt Optimization: the Benefit of Memory in Exemplar-Guided Reflection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yan%2C+C">Cilin Yan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jingyun Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lin Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+R">Ruihui Zhao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiaopu Wu</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+K">Kai Xiong</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qingsong Liu</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+G">Guoliang Kang</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yangyang Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07446v1-abstract-short" style="display: inline;"> Automatic prompt engineering aims to enhance the generation quality of large language models (LLMs). Recent works utilize feedbacks generated from erroneous cases to guide the prompt optimization. During inference, they may further retrieve several semantically-related exemplars and concatenate them to the optimized prompts to improve the performance. However, those works only utilize the feedback… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07446v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07446v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07446v1-abstract-full" style="display: none;"> Automatic prompt engineering aims to enhance the generation quality of large language models (LLMs). Recent works utilize feedbacks generated from erroneous cases to guide the prompt optimization. During inference, they may further retrieve several semantically-related exemplars and concatenate them to the optimized prompts to improve the performance. However, those works only utilize the feedback at the current step, ignoring historical and unseleccted feedbacks which are potentially beneficial. Moreover, the selection of exemplars only considers the general semantic relationship and may not be optimal in terms of task performance and matching with the optimized prompt. In this work, we propose an Exemplar-Guided Reflection with Memory mechanism (ERM) to realize more efficient and accurate prompt optimization. Specifically, we design an exemplar-guided reflection mechanism where the feedback generation is additionally guided by the generated exemplars. We further build two kinds of memory to fully utilize the historical feedback information and support more effective exemplar retrieval. Empirical evaluations show our method surpasses previous state-of-the-arts with less optimization steps, i.e., improving F1 score by 10.1 on LIAR dataset, and reducing half of the optimization steps on ProTeGi. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07446v1-abstract-full').style.display = 'none'; document.getElementById('2411.07446v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06067">arXiv:2411.06067</a> <span> [<a href="https://arxiv.org/pdf/2411.06067">pdf</a>, <a href="https://arxiv.org/format/2411.06067">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> AI-Driven Stylization of 3D Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuanbo Chen</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yixiao Kang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yukun Song</a>, <a href="/search/cs?searchtype=author&query=Vachha%2C+C">Cyrus Vachha</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+S">Sining Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06067v1-abstract-short" style="display: inline;"> In this system, we discuss methods to stylize a scene of 3D primitive objects into a higher fidelity 3D scene using novel 3D representations like NeRFs and 3D Gaussian Splatting. Our approach leverages existing image stylization systems and image-to-3D generative models to create a pipeline that iteratively stylizes and composites 3D objects into scenes. We show our results on adding generated obj… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06067v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06067v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06067v1-abstract-full" style="display: none;"> In this system, we discuss methods to stylize a scene of 3D primitive objects into a higher fidelity 3D scene using novel 3D representations like NeRFs and 3D Gaussian Splatting. Our approach leverages existing image stylization systems and image-to-3D generative models to create a pipeline that iteratively stylizes and composites 3D objects into scenes. We show our results on adding generated objects into a scene and discuss limitations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06067v1-abstract-full').style.display = 'none'; document.getElementById('2411.06067v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21256">arXiv:2410.21256</a> <span> [<a href="https://arxiv.org/pdf/2410.21256">pdf</a>, <a href="https://arxiv.org/format/2410.21256">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Multi-modal AI for comprehensive breast cancer prognostication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Witowski%2C+J">Jan Witowski</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+K">Ken Zeng</a>, <a href="/search/cs?searchtype=author&query=Cappadona%2C+J">Joseph Cappadona</a>, <a href="/search/cs?searchtype=author&query=Elayoubi%2C+J">Jailan Elayoubi</a>, <a href="/search/cs?searchtype=author&query=Chiru%2C+E+D">Elena Diana Chiru</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+N">Nancy Chan</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Young-Joon Kang</a>, <a href="/search/cs?searchtype=author&query=Howard%2C+F">Frederick Howard</a>, <a href="/search/cs?searchtype=author&query=Ostrovnaya%2C+I">Irina Ostrovnaya</a>, <a href="/search/cs?searchtype=author&query=Fernandez-Granda%2C+C">Carlos Fernandez-Granda</a>, <a href="/search/cs?searchtype=author&query=Schnabel%2C+F">Freya Schnabel</a>, <a href="/search/cs?searchtype=author&query=Ozerdem%2C+U">Ugur Ozerdem</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kangning Liu</a>, <a href="/search/cs?searchtype=author&query=Steinsnyder%2C+Z">Zoe Steinsnyder</a>, <a href="/search/cs?searchtype=author&query=Thakore%2C+N">Nitya Thakore</a>, <a href="/search/cs?searchtype=author&query=Sadic%2C+M">Mohammad Sadic</a>, <a href="/search/cs?searchtype=author&query=Yeung%2C+F">Frank Yeung</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+E">Elisa Liu</a>, <a href="/search/cs?searchtype=author&query=Hill%2C+T">Theodore Hill</a>, <a href="/search/cs?searchtype=author&query=Swett%2C+B">Benjamin Swett</a>, <a href="/search/cs?searchtype=author&query=Rigau%2C+D">Danielle Rigau</a>, <a href="/search/cs?searchtype=author&query=Clayburn%2C+A">Andrew Clayburn</a>, <a href="/search/cs?searchtype=author&query=Speirs%2C+V">Valerie Speirs</a>, <a href="/search/cs?searchtype=author&query=Vetter%2C+M">Marcus Vetter</a>, <a href="/search/cs?searchtype=author&query=Sojak%2C+L">Lina Sojak</a> , et al. (26 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21256v1-abstract-short" style="display: inline;"> Treatment selection in breast cancer is guided by molecular subtypes and clinical characteristics. Recurrence risk assessment plays a crucial role in personalizing treatment. Current methods, including genomic assays, have limited accuracy and clinical utility, leading to suboptimal decisions for many patients. We developed a test for breast cancer patient stratification based on digital pathology… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21256v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21256v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21256v1-abstract-full" style="display: none;"> Treatment selection in breast cancer is guided by molecular subtypes and clinical characteristics. Recurrence risk assessment plays a crucial role in personalizing treatment. Current methods, including genomic assays, have limited accuracy and clinical utility, leading to suboptimal decisions for many patients. We developed a test for breast cancer patient stratification based on digital pathology and clinical characteristics using novel AI methods. Specifically, we utilized a vision transformer-based pan-cancer foundation model trained with self-supervised learning to extract features from digitized H&E-stained slides. These features were integrated with clinical data to form a multi-modal AI test predicting cancer recurrence and death. The test was developed and evaluated using data from a total of 8,161 breast cancer patients across 15 cohorts originating from seven countries. Of these, 3,502 patients from five cohorts were used exclusively for evaluation, while the remaining patients were used for training. Our test accurately predicted our primary endpoint, disease-free interval, in the five external cohorts (C-index: 0.71 [0.68-0.75], HR: 3.63 [3.02-4.37, p<0.01]). In a direct comparison (N=858), the AI test was more accurate than Oncotype DX, the standard-of-care 21-gene assay, with a C-index of 0.67 [0.61-0.74] versus 0.61 [0.49-0.73], respectively. Additionally, the AI test added independent information to Oncotype DX in a multivariate analysis (HR: 3.11 [1.91-5.09, p<0.01)]). The test demonstrated robust accuracy across all major breast cancer subtypes, including TNBC (C-index: 0.71 [0.62-0.81], HR: 3.81 [2.35-6.17, p=0.02]), where no diagnostic tools are currently recommended by clinical guidelines. These results suggest that our AI test can improve accuracy, extend applicability to a wider range of patients, and enhance access to treatment selection tools. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21256v1-abstract-full').style.display = 'none'; document.getElementById('2410.21256v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17822">arXiv:2410.17822</a> <span> [<a href="https://arxiv.org/pdf/2410.17822">pdf</a>, <a href="https://arxiv.org/format/2410.17822">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DREB-Net: Dual-stream Restoration Embedding Blur-feature Fusion Network for High-mobility UAV Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qingpeng Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuxin Zhang</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+L">Leyuan Fang</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yuhan Kang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shutao Li</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+X+X">Xiao Xiang Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17822v1-abstract-short" style="display: inline;"> Object detection algorithms are pivotal components of unmanned aerial vehicle (UAV) imaging systems, extensively employed in complex fields. However, images captured by high-mobility UAVs often suffer from motion blur cases, which significantly impedes the performance of advanced object detection algorithms. To address these challenges, we propose an innovative object detection algorithm specifica… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17822v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17822v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17822v1-abstract-full" style="display: none;"> Object detection algorithms are pivotal components of unmanned aerial vehicle (UAV) imaging systems, extensively employed in complex fields. However, images captured by high-mobility UAVs often suffer from motion blur cases, which significantly impedes the performance of advanced object detection algorithms. To address these challenges, we propose an innovative object detection algorithm specifically designed for blurry images, named DREB-Net (Dual-stream Restoration Embedding Blur-feature Fusion Network). First, DREB-Net addresses the particularities of blurry image object detection problem by incorporating a Blurry image Restoration Auxiliary Branch (BRAB) during the training phase. Second, it fuses the extracted shallow features via Multi-level Attention-Guided Feature Fusion (MAGFF) module, to extract richer features. Here, the MAGFF module comprises local attention modules and global attention modules, which assign different weights to the branches. Then, during the inference phase, the deep feature extraction of the BRAB can be removed to reduce computational complexity and improve detection speed. In loss function, a combined loss of MSE and SSIM is added to the BRAB to restore blurry images. Finally, DREB-Net introduces Fast Fourier Transform in the early stages of feature extraction, via a Learnable Frequency domain Amplitude Modulation Module (LFAMM), to adjust feature amplitude and enhance feature processing capability. Experimental results indicate that DREB-Net can still effectively perform object detection tasks under motion blur in captured images, showcasing excellent performance and broad application prospects. Our source code will be available at https://github.com/EEIC-Lab/DREB-Net.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17822v1-abstract-full').style.display = 'none'; document.getElementById('2410.17822v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16237">arXiv:2410.16237</a> <span> [<a href="https://arxiv.org/pdf/2410.16237">pdf</a>, <a href="https://arxiv.org/format/2410.16237">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> IBGP: Imperfect Byzantine Generals Problem for Zero-Shot Robustness in Communicative Multi-Agent Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mao%2C+Y">Yihuan Mao</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yipeng Kang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+P">Peilun Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+N">Ning Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+W">Wei Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chongjie Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16237v2-abstract-short" style="display: inline;"> As large language model (LLM) agents increasingly integrate into our infrastructure, their robust coordination and message synchronization become vital. The Byzantine Generals Problem (BGP) is a critical model for constructing resilient multi-agent systems (MAS) under adversarial attacks. It describes a scenario where malicious agents with unknown identities exist in the system-situations that, in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16237v2-abstract-full').style.display = 'inline'; document.getElementById('2410.16237v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16237v2-abstract-full" style="display: none;"> As large language model (LLM) agents increasingly integrate into our infrastructure, their robust coordination and message synchronization become vital. The Byzantine Generals Problem (BGP) is a critical model for constructing resilient multi-agent systems (MAS) under adversarial attacks. It describes a scenario where malicious agents with unknown identities exist in the system-situations that, in our context, could result from LLM agents' hallucinations or external attacks. In BGP, the objective of the entire system is to reach a consensus on the action to be taken. Traditional BGP requires global consensus among all agents; however, in practical scenarios, global consensus is not always necessary and can even be inefficient. Therefore, there is a pressing need to explore a refined version of BGP that aligns with the local coordination patterns observed in MAS. We refer to this refined version as Imperfect BGP (IBGP) in our research, aiming to address this discrepancy. To tackle this issue, we propose a framework that leverages consensus protocols within general MAS settings, providing provable resilience against communication attacks and adaptability to changing environments, as validated by empirical results. Additionally, we present a case study in a sensor network environment to illustrate the practical application of our protocol. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16237v2-abstract-full').style.display = 'none'; document.getElementById('2410.16237v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14961">arXiv:2410.14961</a> <span> [<a href="https://arxiv.org/pdf/2410.14961">pdf</a>, <a href="https://arxiv.org/format/2410.14961">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> LangGFM: A Large Language Model Alone Can be a Powerful Graph Foundation Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+T">Tianqianjin Lin</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+P">Pengwei Yan</a>, <a href="/search/cs?searchtype=author&query=Song%2C+K">Kaisong Song</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zhuoren Jiang</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yangyang Kang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+J">Jun Lin</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+W">Weikang Yuan</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+J">Junjie Cao</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+C">Changlong Sun</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaozhong Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14961v1-abstract-short" style="display: inline;"> Graph foundation models (GFMs) have recently gained significant attention. However, the unique data processing and evaluation setups employed by different studies hinder a deeper understanding of their progress. Additionally, current research tends to focus on specific subsets of graph learning tasks, such as structural tasks, node-level tasks, or classification tasks. As a result, they often inco… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14961v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14961v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14961v1-abstract-full" style="display: none;"> Graph foundation models (GFMs) have recently gained significant attention. However, the unique data processing and evaluation setups employed by different studies hinder a deeper understanding of their progress. Additionally, current research tends to focus on specific subsets of graph learning tasks, such as structural tasks, node-level tasks, or classification tasks. As a result, they often incorporate specialized modules tailored to particular task types, losing their applicability to other graph learning tasks and contradicting the original intent of foundation models to be universal. Therefore, to enhance consistency, coverage, and diversity across domains, tasks, and research interests within the graph learning community in the evaluation of GFMs, we propose GFMBench-a systematic and comprehensive benchmark comprising 26 datasets. Moreover, we introduce LangGFM, a novel GFM that relies entirely on large language models. By revisiting and exploring the effective graph textualization principles, as well as repurposing successful techniques from graph augmentation and graph self-supervised learning within the language space, LangGFM achieves performance on par with or exceeding the state of the art across GFMBench, which can offer us new perspectives, experiences, and baselines to drive forward the evolution of GFMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14961v1-abstract-full').style.display = 'none'; document.getElementById('2410.14961v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09556">arXiv:2410.09556</a> <span> [<a href="https://arxiv.org/pdf/2410.09556">pdf</a>, <a href="https://arxiv.org/format/2410.09556">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> A Speaker Turn-Aware Multi-Task Adversarial Network for Joint User Satisfaction Estimation and Sentiment Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Song%2C+K">Kaisong Song</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yangyang Kang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiawei Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xurui Li</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+C">Changlong Sun</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaozhong Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09556v1-abstract-short" style="display: inline;"> User Satisfaction Estimation is an important task and increasingly being applied in goal-oriented dialogue systems to estimate whether the user is satisfied with the service. It is observed that whether the user's needs are met often triggers various sentiments, which can be pertinent to the successful estimation of user satisfaction, and vice versa. Thus, User Satisfaction Estimation (USE) and Se… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09556v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09556v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09556v1-abstract-full" style="display: none;"> User Satisfaction Estimation is an important task and increasingly being applied in goal-oriented dialogue systems to estimate whether the user is satisfied with the service. It is observed that whether the user's needs are met often triggers various sentiments, which can be pertinent to the successful estimation of user satisfaction, and vice versa. Thus, User Satisfaction Estimation (USE) and Sentiment Analysis (SA) should be treated as a joint, collaborative effort, considering the strong connections between the sentiment states of speakers and the user satisfaction. Existing joint learning frameworks mainly unify the two highly pertinent tasks over cascade or shared-bottom implementations, however they fail to distinguish task-specific and common features, which will produce sub-optimal utterance representations for downstream tasks. In this paper, we propose a novel Speaker Turn-Aware Multi-Task Adversarial Network (STMAN) for dialogue-level USE and utterance-level SA. Specifically, we first introduce a multi-task adversarial strategy which trains a task discriminator to make utterance representation more task-specific, and then utilize a speaker-turn aware multi-task interaction strategy to extract the common features which are complementary to each task. Extensive experiments conducted on two real-world service dialogue datasets show that our model outperforms several state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09556v1-abstract-full').style.display = 'none'; document.getElementById('2410.09556v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06842">arXiv:2410.06842</a> <span> [<a href="https://arxiv.org/pdf/2410.06842">pdf</a>, <a href="https://arxiv.org/format/2410.06842">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SurANet: Surrounding-Aware Network for Concealed Object Detection via Highly-Efficient Interactive Contrastive Learning Strategy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yuhan Kang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qingpeng Li</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+L">Leyuan Fang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jian Zhao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuelong Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06842v1-abstract-short" style="display: inline;"> Concealed object detection (COD) in cluttered scenes is significant for various image processing applications. However, due to that concealed objects are always similar to their background, it is extremely hard to distinguish them. Here, the major obstacle is the tiny feature differences between the inside and outside object boundary region, which makes it trouble for existing COD methods to achie… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06842v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06842v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06842v1-abstract-full" style="display: none;"> Concealed object detection (COD) in cluttered scenes is significant for various image processing applications. However, due to that concealed objects are always similar to their background, it is extremely hard to distinguish them. Here, the major obstacle is the tiny feature differences between the inside and outside object boundary region, which makes it trouble for existing COD methods to achieve accurate results. In this paper, considering that the surrounding environment information can be well utilized to identify the concealed objects, and thus, we propose a novel deep Surrounding-Aware Network, namely SurANet, for COD tasks, which introduces surrounding information into feature extraction and loss function to improve the discrimination. First, we enhance the semantics of feature maps using differential fusion of surrounding features to highlight concealed objects. Next, a Surrounding-Aware Contrastive Loss is applied to identify the concealed object via learning surrounding feature maps contrastively. Then, SurANet can be trained end-to-end with high efficiency via our proposed Spatial-Compressed Correlation Transmission strategy after our investigation of feature dynamics, and extensive experiments improve that such features can be well reserved respectively. Finally, experimental results demonstrate that the proposed SurANet outperforms state-of-the-art COD methods on multiple real datasets. Our source code will be available at https://github.com/kyh433/SurANet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06842v1-abstract-full').style.display = 'none'; document.getElementById('2410.06842v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02768">arXiv:2410.02768</a> <span> [<a href="https://arxiv.org/pdf/2410.02768">pdf</a>, <a href="https://arxiv.org/format/2410.02768">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> BoViLA: Bootstrapping Video-Language Alignment via LLM-Based Self-Questioning and Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jin Chen</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+K">Kaijing Ma</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Haojian Huang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+J">Jiayu Shen</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+H">Han Fang</a>, <a href="/search/cs?searchtype=author&query=Zang%2C+X">Xianghao Zang</a>, <a href="/search/cs?searchtype=author&query=Ban%2C+C">Chao Ban</a>, <a href="/search/cs?searchtype=author&query=He%2C+Z">Zhongjiang He</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Hao Sun</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yanmei Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02768v1-abstract-short" style="display: inline;"> The development of multi-modal models has been rapidly advancing, with some demonstrating remarkable capabilities. However, annotating video-text pairs remains expensive and insufficient. Take video question answering (VideoQA) tasks as an example, human annotated questions and answers often cover only part of the video, and similar semantics can also be expressed through different text forms, lea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02768v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02768v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02768v1-abstract-full" style="display: none;"> The development of multi-modal models has been rapidly advancing, with some demonstrating remarkable capabilities. However, annotating video-text pairs remains expensive and insufficient. Take video question answering (VideoQA) tasks as an example, human annotated questions and answers often cover only part of the video, and similar semantics can also be expressed through different text forms, leading to underutilization of video. To address this, we propose BoViLA, a self-training framework that augments question samples during training through LLM-based self-questioning and answering, which help model exploit video information and the internal knowledge of LLMs more thoroughly to improve modality alignment. To filter bad self-generated questions, we introduce Evidential Deep Learning (EDL) to estimate uncertainty and assess the quality of self-generated questions by evaluating the modality alignment within the context. To the best of our knowledge, this work is the first to explore LLM-based self-training frameworks for modality alignment. We evaluate BoViLA on five strong VideoQA benchmarks, where it outperforms several state-of-the-art methods and demonstrate its effectiveness and generality. Additionally, we provide extensive analyses of the self-training framework and the EDL-based uncertainty filtering mechanism. The code will be made available at https://github.com/dunknsabsw/BoViLA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02768v1-abstract-full').style.display = 'none'; document.getElementById('2410.02768v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02507">arXiv:2410.02507</a> <span> [<a href="https://arxiv.org/pdf/2410.02507">pdf</a>, <a href="https://arxiv.org/format/2410.02507">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Can Large Language Models Grasp Legal Theories? Enhance Legal Reasoning with Insights from Multi-Agent Collaboration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yuan%2C+W">Weikang Yuan</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+J">Junjie Cao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zhuoren Jiang</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yangyang Kang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+J">Jun Lin</a>, <a href="/search/cs?searchtype=author&query=Song%2C+K">Kaisong Song</a>, <a href="/search/cs?searchtype=author&query=lin%2C+t">tianqianjin lin</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+P">Pengwei Yan</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+C">Changlong Sun</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaozhong Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02507v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) could struggle to fully understand legal theories and perform complex legal reasoning tasks. In this study, we introduce a challenging task (confusing charge prediction) to better evaluate LLMs' understanding of legal theories and reasoning capabilities. We also propose a novel framework: Multi-Agent framework for improving complex Legal Reasoning capability (MALR). MA… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02507v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02507v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02507v1-abstract-full" style="display: none;"> Large Language Models (LLMs) could struggle to fully understand legal theories and perform complex legal reasoning tasks. In this study, we introduce a challenging task (confusing charge prediction) to better evaluate LLMs' understanding of legal theories and reasoning capabilities. We also propose a novel framework: Multi-Agent framework for improving complex Legal Reasoning capability (MALR). MALR employs non-parametric learning, encouraging LLMs to automatically decompose complex legal tasks and mimic human learning process to extract insights from legal rules, helping LLMs better understand legal theories and enhance their legal reasoning abilities. Extensive experiments on multiple real-world datasets demonstrate that the proposed framework effectively addresses complex reasoning issues in practical scenarios, paving the way for more reliable applications in the legal domain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02507v1-abstract-full').style.display = 'none'; document.getElementById('2410.02507v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01188">arXiv:2410.01188</a> <span> [<a href="https://arxiv.org/pdf/2410.01188">pdf</a>, <a href="https://arxiv.org/format/2410.01188">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Gold Panning in Vocabulary: An Adaptive Method for Vocabulary Expansion of Domain-Specific LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chengyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shihang Wang</a>, <a href="/search/cs?searchtype=author&query=Qing%2C+L">Lizhi Qing</a>, <a href="/search/cs?searchtype=author&query=Kuang%2C+K">Kun Kuang</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yangyang Kang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+C">Changlong Sun</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+F">Fei Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01188v1-abstract-short" style="display: inline;"> While Large Language Models (LLMs) demonstrate impressive generation abilities, they frequently struggle when it comes to specialized domains due to their limited domain-specific knowledge. Studies on domain-specific LLMs resort to expanding the vocabulary before fine-tuning on domain-specific corpus, aiming to decrease the sequence length and enhance efficiency during decoding, without thoroughly… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01188v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01188v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01188v1-abstract-full" style="display: none;"> While Large Language Models (LLMs) demonstrate impressive generation abilities, they frequently struggle when it comes to specialized domains due to their limited domain-specific knowledge. Studies on domain-specific LLMs resort to expanding the vocabulary before fine-tuning on domain-specific corpus, aiming to decrease the sequence length and enhance efficiency during decoding, without thoroughly investigating the results of vocabulary expansion to LLMs over different domains. Our pilot study reveals that expansion with only a subset of the entire vocabulary may lead to superior performance. Guided by the discovery, this paper explores how to identify a vocabulary subset to achieve the optimal results. We introduce VEGAD, an adaptive method that automatically identifies valuable words from a given domain vocabulary. Our method has been validated through experiments on three Chinese datasets, demonstrating its effectiveness. Additionally, we have undertaken comprehensive analyses of the method. The selection of a optimal subset for expansion has shown to enhance performance on both domain-specific tasks and general tasks, showcasing the potential of VEGAD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01188v1-abstract-full').style.display = 'none'; document.getElementById('2410.01188v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00367">arXiv:2410.00367</a> <span> [<a href="https://arxiv.org/pdf/2410.00367">pdf</a>, <a href="https://arxiv.org/format/2410.00367">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ROK Defense M&S in the Age of Hyperscale AI: Concepts, Challenges, and Future Directions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+Y">Youngjoon Lee</a>, <a href="/search/cs?searchtype=author&query=Park%2C+T">Taehyun Park</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yeongjoon Kang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jonghoe Kim</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+J">Joonhyuk Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00367v1-abstract-short" style="display: inline;"> Integrating hyperscale AI into national defense modeling and simulation (M&S) is crucial for enhancing strategic and operational capabilities. We explore how hyperscale AI can revolutionize defense M\&S by providing unprecedented accuracy, speed, and the ability to simulate complex scenarios. Countries such as the United States and China are at the forefront of adopting these technologies and are… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00367v1-abstract-full').style.display = 'inline'; document.getElementById('2410.00367v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00367v1-abstract-full" style="display: none;"> Integrating hyperscale AI into national defense modeling and simulation (M&S) is crucial for enhancing strategic and operational capabilities. We explore how hyperscale AI can revolutionize defense M\&S by providing unprecedented accuracy, speed, and the ability to simulate complex scenarios. Countries such as the United States and China are at the forefront of adopting these technologies and are experiencing varying degrees of success. Maximizing the potential of hyperscale AI necessitates addressing critical challenges, such as closed networks, long-tail data, complex decision-making, and a shortage of experts. Future directions emphasize the adoption of domestic foundation models, the investment in various GPUs / NPUs, the utilization of big tech services, and the use of open source software. These initiatives will enhance national security, maintain competitive advantages, and promote broader technological and economic progress. With this blueprint, the Republic of Korea can strengthen its defense capabilities and stay ahead of the emerging threats of modern warfare. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00367v1-abstract-full').style.display = 'none'; document.getElementById('2410.00367v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.20146">arXiv:2409.20146</a> <span> [<a href="https://arxiv.org/pdf/2409.20146">pdf</a>, <a href="https://arxiv.org/format/2409.20146">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> VMAD: Visual-enhanced Multimodal Large Language Model for Zero-Shot Anomaly Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deng%2C+H">Huilin Deng</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hongchen Luo</a>, <a href="/search/cs?searchtype=author&query=Zhai%2C+W">Wei Zhai</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yang Cao</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yu Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.20146v1-abstract-short" style="display: inline;"> Zero-shot anomaly detection (ZSAD) recognizes and localizes anomalies in previously unseen objects by establishing feature mapping between textual prompts and inspection images, demonstrating excellent research value in flexible industrial manufacturing. However, existing ZSAD methods are limited by closed-world settings, struggling to unseen defects with predefined prompts. Recently, adapting Mul… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.20146v1-abstract-full').style.display = 'inline'; document.getElementById('2409.20146v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.20146v1-abstract-full" style="display: none;"> Zero-shot anomaly detection (ZSAD) recognizes and localizes anomalies in previously unseen objects by establishing feature mapping between textual prompts and inspection images, demonstrating excellent research value in flexible industrial manufacturing. However, existing ZSAD methods are limited by closed-world settings, struggling to unseen defects with predefined prompts. Recently, adapting Multimodal Large Language Models (MLLMs) for Industrial Anomaly Detection (IAD) presents a viable solution. Unlike fixed-prompt methods, MLLMs exhibit a generative paradigm with open-ended text interpretation, enabling more adaptive anomaly analysis. However, this adaption faces inherent challenges as anomalies often manifest in fine-grained regions and exhibit minimal visual discrepancies from normal samples. To address these challenges, we propose a novel framework VMAD (Visual-enhanced MLLM Anomaly Detection) that enhances MLLM with visual-based IAD knowledge and fine-grained perception, simultaneously providing precise detection and comprehensive analysis of anomalies. Specifically, we design a Defect-Sensitive Structure Learning scheme that transfers patch-similarities cues from visual branch to our MLLM for improved anomaly discrimination. Besides, we introduce a novel visual projector, Locality-enhanced Token Compression, which mines multi-level features in local contexts to enhance fine-grained detection. Furthermore, we introduce the Real Industrial Anomaly Detection (RIAD), a comprehensive IAD dataset with detailed anomaly descriptions and analyses, offering a valuable resource for MLLM-based IAD development. Extensive experiments on zero-shot benchmarks, including MVTec-AD, Visa, WFDD, and RIAD datasets, demonstrate our superior performance over state-of-the-art methods. The code and dataset will be available soon. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.20146v1-abstract-full').style.display = 'none'; document.getElementById('2409.20146v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15557">arXiv:2409.15557</a> <span> [<a href="https://arxiv.org/pdf/2409.15557">pdf</a>, <a href="https://arxiv.org/format/2409.15557">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Mixture of Efficient Diffusion Experts Through Automatic Interval and Sub-Network Selection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ganjdanesh%2C+A">Alireza Ganjdanesh</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yan Kang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuchen Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Richard Zhang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zhe Lin</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Heng Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15557v1-abstract-short" style="display: inline;"> Diffusion probabilistic models can generate high-quality samples. Yet, their sampling process requires numerous denoising steps, making it slow and computationally intensive. We propose to reduce the sampling cost by pruning a pretrained diffusion model into a mixture of efficient experts. First, we study the similarities between pairs of denoising timesteps, observing a natural clustering, even a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15557v1-abstract-full').style.display = 'inline'; document.getElementById('2409.15557v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15557v1-abstract-full" style="display: none;"> Diffusion probabilistic models can generate high-quality samples. Yet, their sampling process requires numerous denoising steps, making it slow and computationally intensive. We propose to reduce the sampling cost by pruning a pretrained diffusion model into a mixture of efficient experts. First, we study the similarities between pairs of denoising timesteps, observing a natural clustering, even across different datasets. This suggests that rather than having a single model for all time steps, separate models can serve as ``experts'' for their respective time intervals. As such, we separately fine-tune the pretrained model on each interval, with elastic dimensions in depth and width, to obtain experts specialized in their corresponding denoising interval. To optimize the resource usage between experts, we introduce our Expert Routing Agent, which learns to select a set of proper network configurations. By doing so, our method can allocate the computing budget between the experts in an end-to-end manner without requiring manual heuristics. Finally, with a selected configuration, we fine-tune our pruned experts to obtain our mixture of efficient experts. We demonstrate the effectiveness of our method, DiffPruning, across several datasets, LSUN-Church, LSUN-Beds, FFHQ, and ImageNet, on the Latent Diffusion Model architecture. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15557v1-abstract-full').style.display = 'none'; document.getElementById('2409.15557v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to the 18th European Conference on Computer Vision, ECCV 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14577">arXiv:2409.14577</a> <span> [<a href="https://arxiv.org/pdf/2409.14577">pdf</a>, <a href="https://arxiv.org/format/2409.14577">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> AR Overlay: Training Image Pose Estimation on Curved Surface in a Synthetic Way </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+S">Sining Huang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yukun Song</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yixiao Kang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+C">Chang Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14577v1-abstract-short" style="display: inline;"> In the field of spatial computing, one of the most essential tasks is the pose estimation of 3D objects. While rigid transformations of arbitrary 3D objects are relatively hard to detect due to varying environment introducing factors like insufficient lighting or even occlusion, objects with pre-defined shapes are often easy to track, leveraging geometric constraints. Curved images, with flexible… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14577v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14577v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14577v1-abstract-full" style="display: none;"> In the field of spatial computing, one of the most essential tasks is the pose estimation of 3D objects. While rigid transformations of arbitrary 3D objects are relatively hard to detect due to varying environment introducing factors like insufficient lighting or even occlusion, objects with pre-defined shapes are often easy to track, leveraging geometric constraints. Curved images, with flexible dimensions but a confined shape, are essential shapes often targeted in 3D tracking. Traditionally, proprietary algorithms often require specific curvature measures as the input along with the original flattened images to enable pose estimation for a single image target. In this paper, we propose a pipeline that can detect several logo images simultaneously and only requires the original images as the input, unlocking more effects in downstream fields such as Augmented Reality (AR). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14577v1-abstract-full').style.display = 'none'; document.getElementById('2409.14577v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12th International Conference on Signal, Image Processing and Pattern Recognition (SIPP 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11174">arXiv:2409.11174</a> <span> [<a href="https://arxiv.org/pdf/2409.11174">pdf</a>, <a href="https://arxiv.org/format/2409.11174">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Identifying Influential nodes in Brain Networks via Self-Supervised Graph-Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yanqing Kang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+D">Di Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haiyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+E">Enze Shi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+S">Sigang Yu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jinru Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xuhui Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xuan Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+G">Geng Chen</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xi Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tuo Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11174v1-abstract-short" style="display: inline;"> Studying influential nodes (I-nodes) in brain networks is of great significance in the field of brain imaging. Most existing studies consider brain connectivity hubs as I-nodes. However, this approach relies heavily on prior knowledge from graph theory, which may overlook the intrinsic characteristics of the brain network, especially when its architecture is not fully understood. In contrast, self… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11174v1-abstract-full').style.display = 'inline'; document.getElementById('2409.11174v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11174v1-abstract-full" style="display: none;"> Studying influential nodes (I-nodes) in brain networks is of great significance in the field of brain imaging. Most existing studies consider brain connectivity hubs as I-nodes. However, this approach relies heavily on prior knowledge from graph theory, which may overlook the intrinsic characteristics of the brain network, especially when its architecture is not fully understood. In contrast, self-supervised deep learning can learn meaningful representations directly from the data. This approach enables the exploration of I-nodes for brain networks, which is also lacking in current studies. This paper proposes a Self-Supervised Graph Reconstruction framework based on Graph-Transformer (SSGR-GT) to identify I-nodes, which has three main characteristics. First, as a self-supervised model, SSGR-GT extracts the importance of brain nodes to the reconstruction. Second, SSGR-GT uses Graph-Transformer, which is well-suited for extracting features from brain graphs, combining both local and global characteristics. Third, multimodal analysis of I-nodes uses graph-based fusion technology, combining functional and structural brain information. The I-nodes we obtained are distributed in critical areas such as the superior frontal lobe, lateral parietal lobe, and lateral occipital lobe, with a total of 56 identified across different experiments. These I-nodes are involved in more brain networks than other regions, have longer fiber connections, and occupy more central positions in structural connectivity. They also exhibit strong connectivity and high node efficiency in both functional and structural networks. Furthermore, there is a significant overlap between the I-nodes and both the structural and functional rich-club. These findings enhance our understanding of the I-nodes within the brain network, and provide new insights for future research in further understanding the brain working mechanisms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11174v1-abstract-full').style.display = 'none'; document.getElementById('2409.11174v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11170">arXiv:2409.11170</a> <span> [<a href="https://arxiv.org/pdf/2409.11170">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Capturing Differences in Character Representations Between Communities: An Initial Study with Fandom </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kang%2C+B+N+Y">Bianca N. Y. Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11170v1-abstract-short" style="display: inline;"> Sociolinguistic theories have highlighted how narratives are often retold, co-constructed and reconceptualized in collaborative settings. This working paper focuses on the re-interpretation of characters, an integral part of the narrative story-world, and attempts to study how this may be computationally compared between online communities. Using online fandom - a highly communal phenomenon that h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11170v1-abstract-full').style.display = 'inline'; document.getElementById('2409.11170v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11170v1-abstract-full" style="display: none;"> Sociolinguistic theories have highlighted how narratives are often retold, co-constructed and reconceptualized in collaborative settings. This working paper focuses on the re-interpretation of characters, an integral part of the narrative story-world, and attempts to study how this may be computationally compared between online communities. Using online fandom - a highly communal phenomenon that has been largely studied qualitatively - as data, computational methods were applied to explore shifts in character representations between two communities and the original text. Specifically, text from the Harry Potter novels, r/HarryPotter subreddit, and fanfiction on Archive of Our Own were analyzed for changes in character mentions, centrality measures from co-occurrence networks, and semantic associations. While fandom elevates secondary characters as found in past work, the two fan communities prioritize different subsets of characters. Word embedding tests reveal starkly different associations of the same characters between communities on the gendered concepts of femininity/masculinity, cruelty, and beauty. Furthermore, fanfiction descriptions of a male character analyzed between romance pairings scored higher for feminine-coded characteristics in male-male romance, matching past qualitative theorizing. The results high-light the potential for computational methods to assist in capturing the re-conceptualization of narrative elements across communities and in supporting qualitative research on fandom. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11170v1-abstract-full').style.display = 'none'; document.getElementById('2409.11170v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted and presented as a working paper in SBP-BRiMS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.05275">arXiv:2409.05275</a> <span> [<a href="https://arxiv.org/pdf/2409.05275">pdf</a>, <a href="https://arxiv.org/format/2409.05275">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> RexUniNLU: Recursive Method with Explicit Schema Instructor for Universal NLU </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chengyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shihang Wang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+F">Fubang Zhao</a>, <a href="/search/cs?searchtype=author&query=Kuang%2C+K">Kun Kuang</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yangyang Kang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+W">Weiming Lu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+C">Changlong Sun</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+F">Fei Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.05275v1-abstract-short" style="display: inline;"> Information Extraction (IE) and Text Classification (CLS) serve as the fundamental pillars of NLU, with both disciplines relying on analyzing input sequences to categorize outputs into pre-established schemas. However, there is no existing encoder-based model that can unify IE and CLS tasks from this perspective. To fully explore the foundation shared within NLU tasks, we have proposed a Recursive… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05275v1-abstract-full').style.display = 'inline'; document.getElementById('2409.05275v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.05275v1-abstract-full" style="display: none;"> Information Extraction (IE) and Text Classification (CLS) serve as the fundamental pillars of NLU, with both disciplines relying on analyzing input sequences to categorize outputs into pre-established schemas. However, there is no existing encoder-based model that can unify IE and CLS tasks from this perspective. To fully explore the foundation shared within NLU tasks, we have proposed a Recursive Method with Explicit Schema Instructor for Universal NLU. Specifically, we firstly redefine the true universal information extraction (UIE) with a formal formulation that covers almost all extraction schemas, including quadruples and quintuples which remain unsolved for previous UIE models. Then, we expands the formulation to all CLS and multi-modal NLU tasks. Based on that, we introduce RexUniNLU, an universal NLU solution that employs explicit schema constraints for IE and CLS, which encompasses all IE and CLS tasks and prevent incorrect connections between schema and input sequence. To avoid interference between different schemas, we reset the position ids and attention mask matrices. Extensive experiments are conducted on IE, CLS in both English and Chinese, and multi-modality, revealing the effectiveness and superiority. Our codes are publicly released. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05275v1-abstract-full').style.display = 'none'; document.getElementById('2409.05275v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:2304.14770</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.02530">arXiv:2409.02530</a> <span> [<a href="https://arxiv.org/pdf/2409.02530">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Understanding eGFR Trajectories and Kidney Function Decline via Large Multimodal Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+C">Chih-Yuan Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jun-Ting Wu</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+C">Chan Hsu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+M">Ming-Yen Lin</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yihuang Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.02530v1-abstract-short" style="display: inline;"> The estimated Glomerular Filtration Rate (eGFR) is an essential indicator of kidney function in clinical practice. Although traditional equations and Machine Learning (ML) models using clinical and laboratory data can estimate eGFR, accurately predicting future eGFR levels remains a significant challenge for nephrologists and ML researchers. Recent advances demonstrate that Large Language Models (… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02530v1-abstract-full').style.display = 'inline'; document.getElementById('2409.02530v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.02530v1-abstract-full" style="display: none;"> The estimated Glomerular Filtration Rate (eGFR) is an essential indicator of kidney function in clinical practice. Although traditional equations and Machine Learning (ML) models using clinical and laboratory data can estimate eGFR, accurately predicting future eGFR levels remains a significant challenge for nephrologists and ML researchers. Recent advances demonstrate that Large Language Models (LLMs) and Large Multimodal Models (LMMs) can serve as robust foundation models for diverse applications. This study investigates the potential of LMMs to predict future eGFR levels with a dataset consisting of laboratory and clinical values from 50 patients. By integrating various prompting techniques and ensembles of LMMs, our findings suggest that these models, when combined with precise prompts and visual representations of eGFR trajectories, offer predictive performance comparable to existing ML models. This research extends the application of foundation models and suggests avenues for future studies to harness these models in addressing complex medical forecasting challenges. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02530v1-abstract-full').style.display = 'none'; document.getElementById('2409.02530v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This preprint version includes corrections of typographical errors related to numerical values in Table 2, which were present in the version published at the BDH workshop in MIPR 2024. These corrections do not affect the overall conclusions of the study</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.16633">arXiv:2408.16633</a> <span> [<a href="https://arxiv.org/pdf/2408.16633">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Optimizing Automated Picking Systems in Warehouse Robots Using Machine Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+K">Keqin Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jin Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xubo Wu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+X">Xirui Peng</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+R">Runmian Chang</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+X">Xiaoyu Deng</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yiwen Kang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yue Yang</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+F">Fanghao Ni</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+B">Bo Hong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.16633v1-abstract-short" style="display: inline;"> With the rapid growth of global e-commerce, the demand for automation in the logistics industry is increasing. This study focuses on automated picking systems in warehouses, utilizing deep learning and reinforcement learning technologies to enhance picking efficiency and accuracy while reducing system failure rates. Through empirical analysis, we demonstrate the effectiveness of these technologies… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16633v1-abstract-full').style.display = 'inline'; document.getElementById('2408.16633v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.16633v1-abstract-full" style="display: none;"> With the rapid growth of global e-commerce, the demand for automation in the logistics industry is increasing. This study focuses on automated picking systems in warehouses, utilizing deep learning and reinforcement learning technologies to enhance picking efficiency and accuracy while reducing system failure rates. Through empirical analysis, we demonstrate the effectiveness of these technologies in improving robot picking performance and adaptability to complex environments. The results show that the integrated machine learning model significantly outperforms traditional methods, effectively addressing the challenges of peak order processing, reducing operational errors, and improving overall logistics efficiency. Additionally, by analyzing environmental factors, this study further optimizes system design to ensure efficient and stable operation under variable conditions. This research not only provides innovative solutions for logistics automation but also offers a theoretical and empirical foundation for future technological development and application. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.16633v1-abstract-full').style.display = 'none'; document.getElementById('2408.16633v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.15057">arXiv:2408.15057</a> <span> [<a href="https://arxiv.org/pdf/2408.15057">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Subgroup Analysis via Model-based Rule Forest </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+I">I-Ling Cheng</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+C">Chan Hsu</a>, <a href="/search/cs?searchtype=author&query=Ku%2C+C">Chantung Ku</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+P">Pei-Ju Lee</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yihuang Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.15057v1-abstract-short" style="display: inline;"> Machine learning models are often criticized for their black-box nature, raising concerns about their applicability in critical decision-making scenarios. Consequently, there is a growing demand for interpretable models in such contexts. In this study, we introduce Model-based Deep Rule Forests (mobDRF), an interpretable representation learning algorithm designed to extract transparent models from… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15057v1-abstract-full').style.display = 'inline'; document.getElementById('2408.15057v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.15057v1-abstract-full" style="display: none;"> Machine learning models are often criticized for their black-box nature, raising concerns about their applicability in critical decision-making scenarios. Consequently, there is a growing demand for interpretable models in such contexts. In this study, we introduce Model-based Deep Rule Forests (mobDRF), an interpretable representation learning algorithm designed to extract transparent models from data. By leveraging IF-THEN rules with multi-level logic expressions, mobDRF enhances the interpretability of existing models without compromising accuracy. We apply mobDRF to identify key risk factors for cognitive decline in an elderly population, demonstrating its effectiveness in subgroup analysis and local model optimization. Our method offers a promising solution for developing trustworthy and interpretable machine learning models, particularly valuable in fields like healthcare, where understanding differential effects across patient subgroups can lead to more personalized and effective treatments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15057v1-abstract-full').style.display = 'none'; document.getElementById('2408.15057v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.15055">arXiv:2408.15055</a> <span> [<a href="https://arxiv.org/pdf/2408.15055">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Causal Rule Forest: Toward Interpretable and Precise Treatment Effect Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hsu%2C+C">Chan Hsu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jun-Ting Wu</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yihuang Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.15055v1-abstract-short" style="display: inline;"> Understanding and inferencing Heterogeneous Treatment Effects (HTE) and Conditional Average Treatment Effects (CATE) are vital for developing personalized treatment recommendations. Many state-of-the-art approaches achieve inspiring performance in estimating HTE on benchmark datasets or simulation studies. However, the indirect predicting manner and complex model architecture reduce the interpreta… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15055v1-abstract-full').style.display = 'inline'; document.getElementById('2408.15055v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.15055v1-abstract-full" style="display: none;"> Understanding and inferencing Heterogeneous Treatment Effects (HTE) and Conditional Average Treatment Effects (CATE) are vital for developing personalized treatment recommendations. Many state-of-the-art approaches achieve inspiring performance in estimating HTE on benchmark datasets or simulation studies. However, the indirect predicting manner and complex model architecture reduce the interpretability of these approaches. To mitigate the gap between predictive performance and heterogeneity interpretability, we introduce the Causal Rule Forest (CRF), a novel approach to learning hidden patterns from data and transforming the patterns into interpretable multi-level Boolean rules. By training the other interpretable causal inference models with data representation learned by CRF, we can reduce the predictive errors of these models in estimating HTE and CATE, while keeping their interpretability for identifying subgroups that a treatment is more effective. Our experiments underscore the potential of CRF to advance personalized interventions and policies, paving the way for future research to enhance its scalability and application across complex causal inference challenges. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15055v1-abstract-full').style.display = 'none'; document.getElementById('2408.15055v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The 25th IEEE International Conference on Information Reuse and Integration for Data Science (IRI 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14603">arXiv:2408.14603</a> <span> [<a href="https://arxiv.org/pdf/2408.14603">pdf</a>, <a href="https://arxiv.org/format/2408.14603">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Biased Dueling Bandits with Stochastic Delayed Feedback </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yi%2C+B">Bongsoo Yi</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yue Kang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yao Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14603v1-abstract-short" style="display: inline;"> The dueling bandit problem, an essential variation of the traditional multi-armed bandit problem, has become significantly prominent recently due to its broad applications in online advertising, recommendation systems, information retrieval, and more. However, in many real-world applications, the feedback for actions is often subject to unavoidable delays and is not immediately available to the ag… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14603v1-abstract-full').style.display = 'inline'; document.getElementById('2408.14603v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14603v1-abstract-full" style="display: none;"> The dueling bandit problem, an essential variation of the traditional multi-armed bandit problem, has become significantly prominent recently due to its broad applications in online advertising, recommendation systems, information retrieval, and more. However, in many real-world applications, the feedback for actions is often subject to unavoidable delays and is not immediately available to the agent. This partially observable issue poses a significant challenge to existing dueling bandit literature, as it significantly affects how quickly and accurately the agent can update their policy on the fly. In this paper, we introduce and examine the biased dueling bandit problem with stochastic delayed feedback, revealing that this new practical problem will delve into a more realistic and intriguing scenario involving a preference bias between the selections. We present two algorithms designed to handle situations involving delay. Our first algorithm, requiring complete delay distribution information, achieves the optimal regret bound for the dueling bandit problem when there is no delay. The second algorithm is tailored for situations where the distribution is unknown, but only the expected value of delay is available. We provide a comprehensive regret analysis for the two proposed algorithms and then evaluate their empirical performance on both synthetic and real datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14603v1-abstract-full').style.display = 'none'; document.getElementById('2408.14603v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.11868">arXiv:2408.11868</a> <span> [<a href="https://arxiv.org/pdf/2408.11868">pdf</a>, <a href="https://arxiv.org/format/2408.11868">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Improving embedding with contrastive fine-tuning on small datasets with expert-augmented scores </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+J">Jun Lu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+D">David Li</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+B">Bill Ding</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yu Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.11868v1-abstract-short" style="display: inline;"> This paper presents an approach to improve text embedding models through contrastive fine-tuning on small datasets augmented with expert scores. It focuses on enhancing semantic textual similarity tasks and addressing text retrieval problems. The proposed method uses soft labels derived from expert-augmented scores to fine-tune embedding models, preserving their versatility and ensuring retrieval… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11868v1-abstract-full').style.display = 'inline'; document.getElementById('2408.11868v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.11868v1-abstract-full" style="display: none;"> This paper presents an approach to improve text embedding models through contrastive fine-tuning on small datasets augmented with expert scores. It focuses on enhancing semantic textual similarity tasks and addressing text retrieval problems. The proposed method uses soft labels derived from expert-augmented scores to fine-tune embedding models, preserving their versatility and ensuring retrieval capability is improved. The paper evaluates the method using a Q\&A dataset from an online shopping website and eight expert models. Results show improved performance over a benchmark model across multiple metrics on various retrieval tasks from the massive text embedding benchmark (MTEB). The method is cost-effective and practical for real-world applications, especially when labeled data is scarce. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11868v1-abstract-full').style.display = 'none'; document.getElementById('2408.11868v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08021">arXiv:2408.08021</a> <span> [<a href="https://arxiv.org/pdf/2408.08021">pdf</a>, <a href="https://arxiv.org/format/2408.08021">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.18653/v1/2023.emnlp-main.601">10.18653/v1/2023.emnlp-main.601 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> DIVE: Towards Descriptive and Diverse Visual Commonsense Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Park%2C+J">Jun-Hyung Park</a>, <a href="/search/cs?searchtype=author&query=Park%2C+H">Hyuntae Park</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Youjin Kang</a>, <a href="/search/cs?searchtype=author&query=Jeon%2C+E">Eojin Jeon</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+S">SangKeun Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08021v1-abstract-short" style="display: inline;"> Towards human-level visual understanding, visual commonsense generation has been introduced to generate commonsense inferences beyond images. However, current research on visual commonsense generation has overlooked an important human cognitive ability: generating descriptive and diverse inferences. In this work, we propose a novel visual commonsense generation framework, called DIVE, which aims t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08021v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08021v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08021v1-abstract-full" style="display: none;"> Towards human-level visual understanding, visual commonsense generation has been introduced to generate commonsense inferences beyond images. However, current research on visual commonsense generation has overlooked an important human cognitive ability: generating descriptive and diverse inferences. In this work, we propose a novel visual commonsense generation framework, called DIVE, which aims to improve the descriptiveness and diversity of generated inferences. DIVE involves two methods, generic inference filtering and contrastive retrieval learning, which address the limitations of existing visual commonsense resources and training objectives. Experimental results verify that DIVE outperforms state-of-the-art models for visual commonsense generation in terms of both descriptiveness and diversity, while showing a superior quality in generating unique and novel inferences. Notably, DIVE achieves human-level descriptiveness and diversity on Visual Commonsense Graphs. Furthermore, human evaluations confirm that DIVE aligns closely with human judgments on descriptiveness and diversity\footnote{Our code and dataset are available at https://github.com/Park-ing-lot/DIVE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08021v1-abstract-full').style.display = 'none'; document.getElementById('2408.08021v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 10 figuers, EMNLP 2023 (main)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.21510">arXiv:2407.21510</a> <span> [<a href="https://arxiv.org/pdf/2407.21510">pdf</a>, <a href="https://arxiv.org/format/2407.21510">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PEAR: Phrase-Based Hand-Object Interaction Anticipation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zichen Zhang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hongchen Luo</a>, <a href="/search/cs?searchtype=author&query=Zhai%2C+W">Wei Zhai</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yang Cao</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yu Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.21510v1-abstract-short" style="display: inline;"> First-person hand-object interaction anticipation aims to predict the interaction process over a forthcoming period based on current scenes and prompts. This capability is crucial for embodied intelligence and human-robot collaboration. The complete interaction process involves both pre-contact interaction intention (i.e., hand motion trends and interaction hotspots) and post-contact interaction m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21510v1-abstract-full').style.display = 'inline'; document.getElementById('2407.21510v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.21510v1-abstract-full" style="display: none;"> First-person hand-object interaction anticipation aims to predict the interaction process over a forthcoming period based on current scenes and prompts. This capability is crucial for embodied intelligence and human-robot collaboration. The complete interaction process involves both pre-contact interaction intention (i.e., hand motion trends and interaction hotspots) and post-contact interaction manipulation (i.e., manipulation trajectories and hand poses with contact). Existing research typically anticipates only interaction intention while neglecting manipulation, resulting in incomplete predictions and an increased likelihood of intention errors due to the lack of manipulation constraints. To address this, we propose a novel model, PEAR (Phrase-Based Hand-Object Interaction Anticipation), which jointly anticipates interaction intention and manipulation. To handle uncertainties in the interaction process, we employ a twofold approach. Firstly, we perform cross-alignment of verbs, nouns, and images to reduce the diversity of hand movement patterns and object functional attributes, thereby mitigating intention uncertainty. Secondly, we establish bidirectional constraints between intention and manipulation using dynamic integration and residual connections, ensuring consistency among elements and thus overcoming manipulation uncertainty. To rigorously evaluate the performance of the proposed model, we collect a new task-relevant dataset, EGO-HOIP, with comprehensive annotations. Extensive experimental results demonstrate the superiority of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.21510v1-abstract-full').style.display = 'none'; document.getElementById('2407.21510v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages, 10 figures, 4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.17839">arXiv:2407.17839</a> <span> [<a href="https://arxiv.org/pdf/2407.17839">pdf</a>, <a href="https://arxiv.org/format/2407.17839">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Long-term Fairness in Ride-Hailing Platform </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yufan Kang</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+J">Jeffrey Chan</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+W">Wei Shao</a>, <a href="/search/cs?searchtype=author&query=Salim%2C+F+D">Flora D. Salim</a>, <a href="/search/cs?searchtype=author&query=Leckie%2C+C">Christopher Leckie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.17839v1-abstract-short" style="display: inline;"> Matching in two-sided markets such as ride-hailing has recently received significant attention. However, existing studies on ride-hailing mainly focus on optimising efficiency, and fairness issues in ride-hailing have been neglected. Fairness issues in ride-hailing, including significant earning differences between drivers and variance of passenger waiting times among different locations, have pot… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17839v1-abstract-full').style.display = 'inline'; document.getElementById('2407.17839v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.17839v1-abstract-full" style="display: none;"> Matching in two-sided markets such as ride-hailing has recently received significant attention. However, existing studies on ride-hailing mainly focus on optimising efficiency, and fairness issues in ride-hailing have been neglected. Fairness issues in ride-hailing, including significant earning differences between drivers and variance of passenger waiting times among different locations, have potential impacts on economic and ethical aspects. The recent studies that focus on fairness in ride-hailing exploit traditional optimisation methods and the Markov Decision Process to balance efficiency and fairness. However, there are several issues in these existing studies, such as myopic short-term decision-making from traditional optimisation and instability of fairness in a comparably longer horizon from both traditional optimisation and Markov Decision Process-based methods. To address these issues, we propose a dynamic Markov Decision Process model to alleviate fairness issues currently faced by ride-hailing, and seek a balance between efficiency and fairness, with two distinct characteristics: (i) a prediction module to predict the number of requests that will be raised in the future from different locations to allow the proposed method to consider long-term fairness based on the whole timeline instead of consider fairness only based on historical and current data patterns; (ii) a customised scalarisation function for multi-objective multi-agent Q Learning that aims to balance efficiency and fairness. Extensive experiments on a publicly available real-world dataset demonstrate that our proposed method outperforms existing state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17839v1-abstract-full').style.display = 'none'; document.getElementById('2407.17839v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ECML PKDD 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.14814">arXiv:2407.14814</a> <span> [<a href="https://arxiv.org/pdf/2407.14814">pdf</a>, <a href="https://arxiv.org/format/2407.14814">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FMamba: Mamba based on Fast-attention for Multivariate Time-series Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+S">Shusen Ma</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yu Kang</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+P">Peng Bai</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yun-Bo Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.14814v1-abstract-short" style="display: inline;"> In multivariate time-series forecasting (MTSF), extracting the temporal correlations of the input sequences is crucial. While popular Transformer-based predictive models can perform well, their quadratic computational complexity results in inefficiency and high overhead. The recently emerged Mamba, a selective state space model, has shown promising results in many fields due to its strong temporal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14814v1-abstract-full').style.display = 'inline'; document.getElementById('2407.14814v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.14814v1-abstract-full" style="display: none;"> In multivariate time-series forecasting (MTSF), extracting the temporal correlations of the input sequences is crucial. While popular Transformer-based predictive models can perform well, their quadratic computational complexity results in inefficiency and high overhead. The recently emerged Mamba, a selective state space model, has shown promising results in many fields due to its strong temporal feature extraction capabilities and linear computational complexity. However, due to the unilateral nature of Mamba, channel-independent predictive models based on Mamba cannot attend to the relationships among all variables in the manner of Transformer-based models. To address this issue, we combine fast-attention with Mamba to introduce a novel framework named FMamba for MTSF. Technically, we first extract the temporal features of the input variables through an embedding layer, then compute the dependencies among input variables via the fast-attention module. Subsequently, we use Mamba to selectively deal with the input features and further extract the temporal dependencies of the variables through the multi-layer perceptron block (MLP-block). Finally, FMamba obtains the predictive results through the projector, a linear layer. Experimental results on eight public datasets demonstrate that FMamba can achieve state-of-the-art performance while maintaining low computational overhead. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14814v1-abstract-full').style.display = 'none'; document.getElementById('2407.14814v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.14295">arXiv:2407.14295</a> <span> [<a href="https://arxiv.org/pdf/2407.14295">pdf</a>, <a href="https://arxiv.org/format/2407.14295">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> CoVoSwitch: Machine Translation of Synthetic Code-Switched Text Based on Intonation Units </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yeeun Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.14295v1-abstract-short" style="display: inline;"> Multilingual code-switching research is often hindered by the lack and linguistically biased status of available datasets. To expand language representation, we synthesize code-switching data by replacing intonation units detected through PSST, a speech segmentation model fine-tuned from OpenAI's Whisper, using a speech-to-text translation dataset, CoVoST 2. With our dataset, CoVoSwitch, spanning… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14295v1-abstract-full').style.display = 'inline'; document.getElementById('2407.14295v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.14295v1-abstract-full" style="display: none;"> Multilingual code-switching research is often hindered by the lack and linguistically biased status of available datasets. To expand language representation, we synthesize code-switching data by replacing intonation units detected through PSST, a speech segmentation model fine-tuned from OpenAI's Whisper, using a speech-to-text translation dataset, CoVoST 2. With our dataset, CoVoSwitch, spanning 13 languages, we evaluate the code-switching translation performance of two multilingual translation models, M2M-100 418M and NLLB-200 600M. We reveal that the inclusion of code-switching units results in higher translation performance than monolingual settings and that models are better at code-switching translation into English than non-English. Further, low-resource languages gain most from integration of code-switched units when translating into English but much less when translating into non-English. Translations into low-resource languages also perform worse than even raw code-switched inputs. We find that systems excel at copying English tokens but struggle with non-English tokens, that the off-target problem in monolingual settings is also relevant in code-switching settings, and that models hallucinate in code-switching translation by introducing words absent in both of the original source sentences. CoVoSwitch and code are available at https://github.com/sophiayk20/covoswitch. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.14295v1-abstract-full').style.display = 'none'; document.getElementById('2407.14295v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACL 2024 Student Research Workshop (ACL-SRW 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.12537">arXiv:2407.12537</a> <span> [<a href="https://arxiv.org/pdf/2407.12537">pdf</a>, <a href="https://arxiv.org/format/2407.12537">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Collaborative Fall Detection and Response using Wi-Fi Sensing and Mobile Companion Robot </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yunwang Chen</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yaozhong Kang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Ziqi Zhao</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+Y">Yue Hong</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+L">Lingxiao Meng</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+M+Q+-">Max Q. -H. Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.12537v1-abstract-short" style="display: inline;"> This paper presents a collaborative fall detection and response system integrating Wi-Fi sensing with robotic assistance. The proposed system leverages channel state information (CSI) disruptions caused by movements to detect falls in non-line-of-sight (NLOS) scenarios, offering non-intrusive monitoring. Besides, a companion robot is utilized to provide assistance capabilities to navigate and resp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12537v1-abstract-full').style.display = 'inline'; document.getElementById('2407.12537v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.12537v1-abstract-full" style="display: none;"> This paper presents a collaborative fall detection and response system integrating Wi-Fi sensing with robotic assistance. The proposed system leverages channel state information (CSI) disruptions caused by movements to detect falls in non-line-of-sight (NLOS) scenarios, offering non-intrusive monitoring. Besides, a companion robot is utilized to provide assistance capabilities to navigate and respond to incidents autonomously, improving efficiency in providing assistance in various environments. The experimental results demonstrate the effectiveness of the proposed system in detecting falls and responding effectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12537v1-abstract-full').style.display = 'none'; document.getElementById('2407.12537v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Draft for the submission of Robio 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.11036">arXiv:2407.11036</a> <span> [<a href="https://arxiv.org/pdf/2407.11036">pdf</a>, <a href="https://arxiv.org/format/2407.11036">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Hybrid-Generative Diffusion Models for Attack-Oriented Twin Migration in Vehicular Metaverses </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yingkai Kang</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+J">Jinbo Wen</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tao Zhang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+R">Rong Yu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+S">Shengli Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.11036v1-abstract-short" style="display: inline;"> The vehicular metaverse is envisioned as a blended immersive domain that promises to bring revolutionary changes to the automotive industry. As a core component of vehicular metaverses, Vehicle Twins (VTs) are digital twins that cover the entire life cycle of vehicles, providing immersive virtual services for Vehicular Metaverse Users (VMUs). Vehicles with limited resources offload the computation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.11036v1-abstract-full').style.display = 'inline'; document.getElementById('2407.11036v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.11036v1-abstract-full" style="display: none;"> The vehicular metaverse is envisioned as a blended immersive domain that promises to bring revolutionary changes to the automotive industry. As a core component of vehicular metaverses, Vehicle Twins (VTs) are digital twins that cover the entire life cycle of vehicles, providing immersive virtual services for Vehicular Metaverse Users (VMUs). Vehicles with limited resources offload the computationally intensive tasks of constructing and updating VTs to edge servers and migrate VTs between these servers, ensuring seamless and immersive experiences for VMUs. However, the high mobility of vehicles, uneven deployment of edge servers, and potential security threats pose challenges to achieving efficient and reliable VT migrations. To address these issues, we propose a secure and reliable VT migration framework in vehicular metaverses. Specifically, we design a two-layer trust evaluation model to comprehensively evaluate the reputation value of edge servers in the network communication and interaction layers. Then, we model the VT migration problem as a partially observable Markov decision process and design a hybrid-Generative Diffusion Model (GDM) algorithm based on deep reinforcement learning to generate optimal migration decisions by taking hybrid actions (i.e., continuous actions and discrete actions). Numerical results demonstrate that the hybrid-GDM algorithm outperforms the baseline algorithms, showing strong adaptability in various settings and highlighting the potential of the hybrid-GDM algorithm for addressing various optimization issues in vehicular metaverses. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.11036v1-abstract-full').style.display = 'none'; document.getElementById('2407.11036v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.08554">arXiv:2407.08554</a> <span> [<a href="https://arxiv.org/pdf/2407.08554">pdf</a>, <a href="https://arxiv.org/format/2407.08554">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Establishing Rigorous and Cost-effective Clinical Trials for Artificial Intelligence Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+W">Wanling Gao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yunyou Huang</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+D">Dandan Cui</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhuoming Yu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wenjing Liu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+X">Xiaoshuang Liang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jiahui Zhao</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+J">Jiyue Xie</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Li Ma</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Ning Ye</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yumiao Kang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+D">Dingfeng Luo</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+P">Peng Pan</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wei Huang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhongmou Liu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Jizhong Hu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+G">Gangyuan Zhao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chongrong Jiang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+F">Fan Huang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+T">Tianyi Wei</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+S">Suqin Tang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+B">Bingjie Xia</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhifei Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+J">Jianfeng Zhan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.08554v2-abstract-short" style="display: inline;"> A profound gap persists between artificial intelligence (AI) and clinical practice in medicine, primarily due to the lack of rigorous and cost-effective evaluation methodologies. State-of-the-art and state-of-the-practice AI model evaluations are limited to laboratory studies on medical datasets or direct clinical trials with no or solely patient-centered controls. Moreover, the crucial role of cl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08554v2-abstract-full').style.display = 'inline'; document.getElementById('2407.08554v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.08554v2-abstract-full" style="display: none;"> A profound gap persists between artificial intelligence (AI) and clinical practice in medicine, primarily due to the lack of rigorous and cost-effective evaluation methodologies. State-of-the-art and state-of-the-practice AI model evaluations are limited to laboratory studies on medical datasets or direct clinical trials with no or solely patient-centered controls. Moreover, the crucial role of clinicians in collaborating with AI, pivotal for determining its impact on clinical practice, is often overlooked. For the first time, we emphasize the critical necessity for rigorous and cost-effective evaluation methodologies for AI models in clinical practice, featuring patient/clinician-centered (dual-centered) AI randomized controlled trials (DC-AI RCTs) and virtual clinician-based in-silico trials (VC-MedAI) as an effective proxy for DC-AI RCTs. Leveraging 7500 diagnosis records from two-step inaugural DC-AI RCTs across 14 medical centers with 125 clinicians, our results demonstrate the necessity of DC-AI RCTs and the effectiveness of VC-MedAI. Notably, VC-MedAI performs comparably to human clinicians, replicating insights and conclusions from prospective DC-AI RCTs. We envision DC-AI RCTs and VC-MedAI as pivotal advancements, presenting innovative and transformative evaluation methodologies for AI models in clinical practice, offering a preclinical-like setting mirroring conventional medicine, and reshaping development paradigms in a cost-effective and fast-iterative manner. Chinese Clinical Trial Registration: ChiCTR2400086816. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08554v2-abstract-full').style.display = 'none'; document.getElementById('2407.08554v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.07930">arXiv:2407.07930</a> <span> [<a href="https://arxiv.org/pdf/2407.07930">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Token-Mol 1.0: Tokenized drug design with large language model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jike Wang</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+R">Rui Qin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+M">Mingyang Wang</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+M">Meijing Fang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yangyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yuchen Zhu</a>, <a href="/search/cs?searchtype=author&query=Su%2C+Q">Qun Su</a>, <a href="/search/cs?searchtype=author&query=Gou%2C+Q">Qiaolin Gou</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+C">Chao Shen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+O">Odin Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhenxing Wu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+D">Dejun Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xujun Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Huifeng Zhao</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+X">Xiaozhe Wan</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhourui Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Liwei Liu</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yu Kang</a>, <a href="/search/cs?searchtype=author&query=Hsieh%2C+C">Chang-Yu Hsieh</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+T">Tingjun Hou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.07930v2-abstract-short" style="display: inline;"> Significant interests have recently risen in leveraging sequence-based large language models (LLMs) for drug design. However, most current applications of LLMs in drug discovery lack the ability to comprehend three-dimensional (3D) structures, thereby limiting their effectiveness in tasks that explicitly involve molecular conformations. In this study, we introduced Token-Mol, a token-only 3D drug… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07930v2-abstract-full').style.display = 'inline'; document.getElementById('2407.07930v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.07930v2-abstract-full" style="display: none;"> Significant interests have recently risen in leveraging sequence-based large language models (LLMs) for drug design. However, most current applications of LLMs in drug discovery lack the ability to comprehend three-dimensional (3D) structures, thereby limiting their effectiveness in tasks that explicitly involve molecular conformations. In this study, we introduced Token-Mol, a token-only 3D drug design model. This model encodes all molecular information, including 2D and 3D structures, as well as molecular property data, into tokens, which transforms classification and regression tasks in drug discovery into probabilistic prediction problems, thereby enabling learning through a unified paradigm. Token-Mol is built on the transformer decoder architecture and trained using random causal masking techniques. Additionally, we proposed the Gaussian cross-entropy (GCE) loss function to overcome the challenges in regression tasks, significantly enhancing the capacity of LLMs to learn continuous numerical values. Through a combination of fine-tuning and reinforcement learning (RL), Token-Mol achieves performance comparable to or surpassing existing task-specific methods across various downstream tasks, including pocket-based molecular generation, conformation generation, and molecular property prediction. Compared to existing molecular pre-trained models, Token-Mol exhibits superior proficiency in handling a wider range of downstream tasks essential for drug design. Notably, our approach improves regression task accuracy by approximately 30% compared to similar token-only methods. Token-Mol overcomes the precision limitations of token-only models and has the potential to integrate seamlessly with general models such as ChatGPT, paving the way for the development of a universal artificial intelligence drug design model that facilitates rapid and high-quality drug design by experts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.07930v2-abstract-full').style.display = 'none'; document.getElementById('2407.07930v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.00118">arXiv:2407.00118</a> <span> [<a href="https://arxiv.org/pdf/2407.00118">pdf</a>, <a href="https://arxiv.org/format/2407.00118">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> From Efficient Multimodal Models to World Models: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mai%2C+X">Xinji Mai</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+Z">Zeng Tao</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+J">Junxiong Lin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haoran Wang</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+Y">Yang Chang</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yanlan Kang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yan Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenqiang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.00118v1-abstract-short" style="display: inline;"> Multimodal Large Models (MLMs) are becoming a significant research focus, combining powerful large language models with multimodal learning to perform complex tasks across different data modalities. This review explores the latest developments and challenges in MLMs, emphasizing their potential in achieving artificial general intelligence and as a pathway to world models. We provide an overview of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00118v1-abstract-full').style.display = 'inline'; document.getElementById('2407.00118v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.00118v1-abstract-full" style="display: none;"> Multimodal Large Models (MLMs) are becoming a significant research focus, combining powerful large language models with multimodal learning to perform complex tasks across different data modalities. This review explores the latest developments and challenges in MLMs, emphasizing their potential in achieving artificial general intelligence and as a pathway to world models. We provide an overview of key techniques such as Multimodal Chain of Thought (M-COT), Multimodal Instruction Tuning (M-IT), and Multimodal In-Context Learning (M-ICL). Additionally, we discuss both the fundamental and specific technologies of multimodal models, highlighting their applications, input/output modalities, and design characteristics. Despite significant advancements, the development of a unified multimodal model remains elusive. We discuss the integration of 3D generation and embodied intelligence to enhance world simulation capabilities and propose incorporating external rule systems for improved reasoning and decision-making. Finally, we outline future research directions to address these challenges and advance the field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00118v1-abstract-full').style.display = 'none'; document.getElementById('2407.00118v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.00113">arXiv:2407.00113</a> <span> [<a href="https://arxiv.org/pdf/2407.00113">pdf</a>, <a href="https://arxiv.org/format/2407.00113">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3637528.3671948">10.1145/3637528.3671948 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Personalized Federated Continual Learning via Multi-granularity Prompt </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+H">Hao Yu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xin Yang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+X">Xin Gao</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yan Kang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Junbo Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+T">Tianrui Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.00113v1-abstract-short" style="display: inline;"> Personalized Federated Continual Learning (PFCL) is a new practical scenario that poses greater challenges in sharing and personalizing knowledge. PFCL not only relies on knowledge fusion for server aggregation at the global spatial-temporal perspective but also needs model improvement for each client according to the local requirements. Existing methods, whether in Personalized Federated Learning… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00113v1-abstract-full').style.display = 'inline'; document.getElementById('2407.00113v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.00113v1-abstract-full" style="display: none;"> Personalized Federated Continual Learning (PFCL) is a new practical scenario that poses greater challenges in sharing and personalizing knowledge. PFCL not only relies on knowledge fusion for server aggregation at the global spatial-temporal perspective but also needs model improvement for each client according to the local requirements. Existing methods, whether in Personalized Federated Learning (PFL) or Federated Continual Learning (FCL), have overlooked the multi-granularity representation of knowledge, which can be utilized to overcome Spatial-Temporal Catastrophic Forgetting (STCF) and adopt generalized knowledge to itself by coarse-to-fine human cognitive mechanisms. Moreover, it allows more effectively to personalized shared knowledge, thus serving its own purpose. To this end, we propose a novel concept called multi-granularity prompt, i.e., coarse-grained global prompt acquired through the common model learning process, and fine-grained local prompt used to personalize the generalized representation. The former focuses on efficiently transferring shared global knowledge without spatial forgetting, and the latter emphasizes specific learning of personalized local knowledge to overcome temporal forgetting. In addition, we design a selective prompt fusion mechanism for aggregating knowledge of global prompts distilled from different clients. By the exclusive fusion of coarse-grained knowledge, we achieve the transmission and refinement of common knowledge among clients, further enhancing the performance of personalization. Extensive experiments demonstrate the effectiveness of the proposed method in addressing STCF as well as improving personalized performance. Our code now is available at https://github.com/SkyOfBeginning/FedMGP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00113v1-abstract-full').style.display = 'none'; document.getElementById('2407.00113v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by KDD 2024 Research Track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.15097">arXiv:2406.15097</a> <span> [<a href="https://arxiv.org/pdf/2406.15097">pdf</a>, <a href="https://arxiv.org/format/2406.15097">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Modeling and Analysis of Application Interference on Dragonfly+ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yao Kang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xin Wang</a>, <a href="/search/cs?searchtype=author&query=McGlohon%2C+N">Neil McGlohon</a>, <a href="/search/cs?searchtype=author&query=Mubarak%2C+M">Misbah Mubarak</a>, <a href="/search/cs?searchtype=author&query=Chunduri%2C+S">Sudheer Chunduri</a>, <a href="/search/cs?searchtype=author&query=Lan%2C+Z">Zhiling Lan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.15097v1-abstract-short" style="display: inline;"> Dragonfly class of networks are considered as promising interconnects for next-generation supercomputers. While Dragonfly+ networks offer more path diversity than the original Dragonfly design, they are still prone to performance variability due to their hierarchical architecture and resource sharing design. Event-driven network simulators are indispensable tools for navigating complex system desi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15097v1-abstract-full').style.display = 'inline'; document.getElementById('2406.15097v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.15097v1-abstract-full" style="display: none;"> Dragonfly class of networks are considered as promising interconnects for next-generation supercomputers. While Dragonfly+ networks offer more path diversity than the original Dragonfly design, they are still prone to performance variability due to their hierarchical architecture and resource sharing design. Event-driven network simulators are indispensable tools for navigating complex system design. In this study, we quantitatively evaluate a variety of application communication interactions on a 3,456-node Dragonfly+ system by using the CODES toolkit. This study looks at the impact of communication interference from a user's perspective. Specifically, for a given application submitted by a user, we examine how this application will behave with the existing workload running in the system under different job placement policies. Our simulation study considers hundreds of experiment configurations including four target applications with representative communication patterns under a variety of network traffic conditions. Our study shows that intra-job interference can cause severe performance degradation for communication-intensive applications. Inter-job interference can generally be reduced for applications with one-to-one or one-to-many communication patterns through job isolation. Application with one-to-all communication pattern is resilient to network interference. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.15097v1-abstract-full').style.display = 'none'; document.getElementById('2406.15097v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by SIGSIM PADS 2019</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.12403">arXiv:2406.12403</a> <span> [<a href="https://arxiv.org/pdf/2406.12403">pdf</a>, <a href="https://arxiv.org/format/2406.12403">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> PDSS: A Privacy-Preserving Framework for Step-by-Step Distillation of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fan%2C+T">Tao Fan</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yan Kang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Weijing Chen</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+H">Hanlin Gu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yuanfeng Song</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+L">Lixin Fan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Q">Qiang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.12403v1-abstract-short" style="display: inline;"> In the context of real-world applications, leveraging large language models (LLMs) for domain-specific tasks often faces two major challenges: domain-specific knowledge privacy and constrained resources. To address these issues, we propose PDSS, a privacy-preserving framework for step-by-step distillation of LLMs. PDSS works on a server-client architecture, wherein client transmits perturbed promp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12403v1-abstract-full').style.display = 'inline'; document.getElementById('2406.12403v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.12403v1-abstract-full" style="display: none;"> In the context of real-world applications, leveraging large language models (LLMs) for domain-specific tasks often faces two major challenges: domain-specific knowledge privacy and constrained resources. To address these issues, we propose PDSS, a privacy-preserving framework for step-by-step distillation of LLMs. PDSS works on a server-client architecture, wherein client transmits perturbed prompts to the server's LLM for rationale generation. The generated rationales are then decoded by the client and used to enrich the training of task-specific small language model(SLM) within a multi-task learning paradigm. PDSS introduces two privacy protection strategies: the Exponential Mechanism Strategy and the Encoder-Decoder Strategy, balancing prompt privacy and rationale usability. Experiments demonstrate the effectiveness of PDSS in various text generation tasks, enabling the training of task-specific SLM with enhanced performance while prioritizing data privacy protection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12403v1-abstract-full').style.display = 'none'; document.getElementById('2406.12403v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.07362">arXiv:2406.07362</a> <span> [<a href="https://arxiv.org/pdf/2406.07362">pdf</a>, <a href="https://arxiv.org/format/2406.07362">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> AI.vs.Clinician: Unveiling Intricate Interactions Between AI and Clinicians through an Open-Access Database </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+W">Wanling Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuan Liu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhuoming Yu</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+D">Dandan Cui</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wenjing Liu</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+X">Xiaoshuang Liang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jiahui Zhao</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+J">Jiyue Xie</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Li Ma</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+N">Ning Ye</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yumiao Kang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+D">Dingfeng Luo</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+P">Peng Pan</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wei Huang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhongmou Liu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Jizhong Hu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+F">Fan Huang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+G">Gangyuan Zhao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chongrong Jiang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+T">Tianyi Wei</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhifei Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yunyou Huang</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+J">Jianfeng Zhan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.07362v3-abstract-short" style="display: inline;"> Artificial Intelligence (AI) plays a crucial role in medical field and has the potential to revolutionize healthcare practices. However, the success of AI models and their impacts hinge on the synergy between AI and medical specialists, with clinicians assuming a dominant role. Unfortunately, the intricate dynamics and interactions between AI and clinicians remain undiscovered and thus hinder AI f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07362v3-abstract-full').style.display = 'inline'; document.getElementById('2406.07362v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.07362v3-abstract-full" style="display: none;"> Artificial Intelligence (AI) plays a crucial role in medical field and has the potential to revolutionize healthcare practices. However, the success of AI models and their impacts hinge on the synergy between AI and medical specialists, with clinicians assuming a dominant role. Unfortunately, the intricate dynamics and interactions between AI and clinicians remain undiscovered and thus hinder AI from being translated into medical practice. To address this gap, we have curated a groundbreaking database called AI.vs.Clinician. This database is the first of its kind for studying the interactions between AI and clinicians. It derives from 7,500 collaborative diagnosis records on a life-threatening medical emergency -- Sepsis -- from 14 medical centers across China. For the patient cohorts well-chosen from MIMIC databases, the AI-related information comprises the model property, feature input, diagnosis decision, and inferred probabilities of sepsis onset presently and within next three hours. The clinician-related information includes the viewed examination data and sequence, viewed time, preliminary and final diagnosis decisions with or without AI assistance, and recommended treatment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07362v3-abstract-full').style.display = 'none'; document.getElementById('2406.07362v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.04100">arXiv:2406.04100</a> <span> [<a href="https://arxiv.org/pdf/2406.04100">pdf</a>, <a href="https://arxiv.org/format/2406.04100">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Class-Aware Cartilage Segmentation for Autonomous US-CT Registration in Robotic Intercostal Ultrasound Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zhongliang Jiang</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yunfeng Kang</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Y">Yuan Bi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuesong Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chenyang Li</a>, <a href="/search/cs?searchtype=author&query=Navab%2C+N">Nassir Navab</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.04100v1-abstract-short" style="display: inline;"> Ultrasound imaging has been widely used in clinical examinations owing to the advantages of being portable, real-time, and radiation-free. Considering the potential of extensive deployment of autonomous examination systems in hospitals, robotic US imaging has attracted increased attention. However, due to the inter-patient variations, it is still challenging to have an optimal path for each patien… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04100v1-abstract-full').style.display = 'inline'; document.getElementById('2406.04100v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.04100v1-abstract-full" style="display: none;"> Ultrasound imaging has been widely used in clinical examinations owing to the advantages of being portable, real-time, and radiation-free. Considering the potential of extensive deployment of autonomous examination systems in hospitals, robotic US imaging has attracted increased attention. However, due to the inter-patient variations, it is still challenging to have an optimal path for each patient, particularly for thoracic applications with limited acoustic windows, e.g., intercostal liver imaging. To address this problem, a class-aware cartilage bone segmentation network with geometry-constraint post-processing is presented to capture patient-specific rib skeletons. Then, a dense skeleton graph-based non-rigid registration is presented to map the intercostal scanning path from a generic template to individual patients. By explicitly considering the high-acoustic impedance bone structures, the transferred scanning path can be precisely located in the intercostal space, enhancing the visibility of internal organs by reducing the acoustic shadow. To evaluate the proposed approach, the final path mapping performance is validated on five distinct CTs and two volunteer US data, resulting in ten pairs of CT-US combinations. Results demonstrate that the proposed graph-based registration method can robustly and precisely map the path from CT template to individual patients (Euclidean error: $2.21\pm1.11~mm$). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04100v1-abstract-full').style.display = 'none'; document.getElementById('2406.04100v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.04035">arXiv:2406.04035</a> <span> [<a href="https://arxiv.org/pdf/2406.04035">pdf</a>, <a href="https://arxiv.org/format/2406.04035">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> STEMO: Early Spatio-temporal Forecasting with Multi-Objective Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shao%2C+W">Wei Shao</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yufan Kang</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+Z">Ziyan Peng</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+X">Xiao Xiao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lei Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yuhui Yang</a>, <a href="/search/cs?searchtype=author&query=Salim%2C+F+D">Flora D Salim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.04035v3-abstract-short" style="display: inline;"> Accuracy and timeliness are indeed often conflicting goals in prediction tasks. Premature predictions may yield a higher rate of false alarms, whereas delaying predictions to gather more information can render them too late to be useful. In applications such as wildfires, crimes, and traffic jams, timely forecasting are vital for safeguarding human life and property. Consequently, finding a balanc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04035v3-abstract-full').style.display = 'inline'; document.getElementById('2406.04035v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.04035v3-abstract-full" style="display: none;"> Accuracy and timeliness are indeed often conflicting goals in prediction tasks. Premature predictions may yield a higher rate of false alarms, whereas delaying predictions to gather more information can render them too late to be useful. In applications such as wildfires, crimes, and traffic jams, timely forecasting are vital for safeguarding human life and property. Consequently, finding a balance between accuracy and timeliness is crucial. In this paper, we propose an early spatio-temporal forecasting model based on Multi-Objective reinforcement learning that can either implement an optimal policy given a preference or infer the preference based on a small number of samples. The model addresses two primary challenges: 1) enhancing the accuracy of early forecasting and 2) providing the optimal policy for determining the most suitable prediction time for each area. Our method demonstrates superior performance on three large-scale real-world datasets, surpassing existing methods in early spatio-temporal forecasting tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04035v3-abstract-full').style.display = 'none'; document.getElementById('2406.04035v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted paper in KDD 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.02224">arXiv:2406.02224</a> <span> [<a href="https://arxiv.org/pdf/2406.02224">pdf</a>, <a href="https://arxiv.org/format/2406.02224">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> FedMKT: Federated Mutual Knowledge Transfer for Large and Small Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fan%2C+T">Tao Fan</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+G">Guoqiang Ma</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yan Kang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+H">Hanlin Gu</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yuanfeng Song</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+L">Lixin Fan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Q">Qiang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.02224v2-abstract-short" style="display: inline;"> Recent research in federated large language models (LLMs) has primarily focused on enabling clients to fine-tune their locally deployed homogeneous LLMs collaboratively or on transferring knowledge from server-based LLMs to small language models (SLMs) at downstream clients. However, a significant gap remains in the simultaneous mutual enhancement of both the server's LLM and clients' SLMs. To bri… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02224v2-abstract-full').style.display = 'inline'; document.getElementById('2406.02224v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.02224v2-abstract-full" style="display: none;"> Recent research in federated large language models (LLMs) has primarily focused on enabling clients to fine-tune their locally deployed homogeneous LLMs collaboratively or on transferring knowledge from server-based LLMs to small language models (SLMs) at downstream clients. However, a significant gap remains in the simultaneous mutual enhancement of both the server's LLM and clients' SLMs. To bridge this gap, we propose FedMKT, a parameter-efficient federated mutual knowledge transfer framework for large and small language models. This framework is designed to adaptively transfer knowledge from the server's LLM to clients' SLMs while concurrently enriching the LLM with clients' unique domain insights. We facilitate token alignment using minimum edit distance (MinED) and then selective mutual knowledge transfer between client-side SLMs and a server-side LLM, aiming to collectively enhance their performance. Through extensive experiments across three distinct scenarios, we evaluate the effectiveness of FedMKT using various public LLMs and SLMs on a range of NLP text generation tasks. Empirical results demonstrate that FedMKT simultaneously boosts the performance of both LLMs and SLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02224v2-abstract-full').style.display = 'none'; document.getElementById('2406.02224v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.01085">arXiv:2406.01085</a> <span> [<a href="https://arxiv.org/pdf/2406.01085">pdf</a>, <a href="https://arxiv.org/format/2406.01085">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> FedAdOb: Privacy-Preserving Federated Deep Learning with Adaptive Obfuscation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gu%2C+H">Hanlin Gu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+J">Jiahuan Luo</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yan Kang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+Y">Yuan Yao</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+G">Gongxi Zhu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bowen Li</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+L">Lixin Fan</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Q">Qiang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.01085v1-abstract-short" style="display: inline;"> Federated learning (FL) has emerged as a collaborative approach that allows multiple clients to jointly learn a machine learning model without sharing their private data. The concern about privacy leakage, albeit demonstrated under specific conditions, has triggered numerous follow-up research in designing powerful attacking methods and effective defending mechanisms aiming to thwart these attacki… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01085v1-abstract-full').style.display = 'inline'; document.getElementById('2406.01085v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.01085v1-abstract-full" style="display: none;"> Federated learning (FL) has emerged as a collaborative approach that allows multiple clients to jointly learn a machine learning model without sharing their private data. The concern about privacy leakage, albeit demonstrated under specific conditions, has triggered numerous follow-up research in designing powerful attacking methods and effective defending mechanisms aiming to thwart these attacking methods. Nevertheless, privacy-preserving mechanisms employed in these defending methods invariably lead to compromised model performances due to a fixed obfuscation applied to private data or gradients. In this article, we, therefore, propose a novel adaptive obfuscation mechanism, coined FedAdOb, to protect private data without yielding original model performances. Technically, FedAdOb utilizes passport-based adaptive obfuscation to ensure data privacy in both horizontal and vertical federated learning settings. The privacy-preserving capabilities of FedAdOb, specifically with regard to private features and labels, are theoretically proven through Theorems 1 and 2. Furthermore, extensive experimental evaluations conducted on various datasets and network architectures demonstrate the effectiveness of FedAdOb by manifesting its superior trade-off between privacy preservation and model performance, surpassing existing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01085v1-abstract-full').style.display = 'none'; document.getElementById('2406.01085v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.00195">arXiv:2406.00195</a> <span> [<a href="https://arxiv.org/pdf/2406.00195">pdf</a>, <a href="https://arxiv.org/format/2406.00195">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SNED: Superposition Network Architecture Search for Efficient Video Diffusion Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhengang Li</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yan Kang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuchen Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Difan Liu</a>, <a href="/search/cs?searchtype=author&query=Hinz%2C+T">Tobias Hinz</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Feng Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yanzhi Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.00195v1-abstract-short" style="display: inline;"> While AI-generated content has garnered significant attention, achieving photo-realistic video synthesis remains a formidable challenge. Despite the promising advances in diffusion models for video generation quality, the complex model architecture and substantial computational demands for both training and inference create a significant gap between these models and real-world applications. This p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00195v1-abstract-full').style.display = 'inline'; document.getElementById('2406.00195v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.00195v1-abstract-full" style="display: none;"> While AI-generated content has garnered significant attention, achieving photo-realistic video synthesis remains a formidable challenge. Despite the promising advances in diffusion models for video generation quality, the complex model architecture and substantial computational demands for both training and inference create a significant gap between these models and real-world applications. This paper presents SNED, a superposition network architecture search method for efficient video diffusion model. Our method employs a supernet training paradigm that targets various model cost and resolution options using a weight-sharing method. Moreover, we propose the supernet training sampling warm-up for fast training optimization. To showcase the flexibility of our method, we conduct experiments involving both pixel-space and latent-space video diffusion models. The results demonstrate that our framework consistently produces comparable results across different model options with high efficiency. According to the experiment for the pixel-space video diffusion model, we can achieve consistent video generation results simultaneously across 64 x 64 to 256 x 256 resolutions with a large range of model sizes from 640M to 1.6B number of parameters for pixel-space video diffusion models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00195v1-abstract-full').style.display = 'none'; document.getElementById('2406.00195v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in CVPR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.20681">arXiv:2405.20681</a> <span> [<a href="https://arxiv.org/pdf/2405.20681">pdf</a>, <a href="https://arxiv.org/format/2405.20681">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> No Free Lunch Theorem for Privacy-Preserving LLM Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaojin Zhang</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+Y">Yulin Fei</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yan Kang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wei Chen</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+L">Lixin Fan</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+H">Hai Jin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Q">Qiang Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.20681v1-abstract-short" style="display: inline;"> Individuals and businesses have been significantly benefited by Large Language Models (LLMs) including PaLM, Gemini and ChatGPT in various ways. For example, LLMs enhance productivity, reduce costs, and enable us to focus on more valuable tasks. Furthermore, LLMs possess the capacity to sift through extensive datasets, uncover underlying patterns, and furnish critical insights that propel the fron… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20681v1-abstract-full').style.display = 'inline'; document.getElementById('2405.20681v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.20681v1-abstract-full" style="display: none;"> Individuals and businesses have been significantly benefited by Large Language Models (LLMs) including PaLM, Gemini and ChatGPT in various ways. For example, LLMs enhance productivity, reduce costs, and enable us to focus on more valuable tasks. Furthermore, LLMs possess the capacity to sift through extensive datasets, uncover underlying patterns, and furnish critical insights that propel the frontiers of technology and science. However, LLMs also pose privacy concerns. Users' interactions with LLMs may expose their sensitive personal or company information. A lack of robust privacy safeguards and legal frameworks could permit the unwarranted intrusion or improper handling of individual data, thereby risking infringements of privacy and the theft of personal identities. To ensure privacy, it is essential to minimize the dependency between shared prompts and private information. Various randomization approaches have been proposed to protect prompts' privacy, but they may incur utility loss compared to unprotected LLMs prompting. Therefore, it is essential to evaluate the balance between the risk of privacy leakage and loss of utility when conducting effective protection mechanisms. The current study develops a framework for inferring privacy-protected Large Language Models (LLMs) and lays down a solid theoretical basis for examining the interplay between privacy preservation and utility. The core insight is encapsulated within a theorem that is called as the NFL (abbreviation of the word No-Free-Lunch) Theorem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20681v1-abstract-full').style.display = 'none'; document.getElementById('2405.20681v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.19184">arXiv:2405.19184</a> <span> [<a href="https://arxiv.org/pdf/2405.19184">pdf</a>, <a href="https://arxiv.org/format/2405.19184">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3638529.3654207">10.1145/3638529.3654207 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Promoting Two-sided Fairness in Dynamic Vehicle Routing Problem </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yufan Kang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rongsheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+W">Wei Shao</a>, <a href="/search/cs?searchtype=author&query=Salim%2C+F+D">Flora D. Salim</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+J">Jeffrey Chan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.19184v1-abstract-short" style="display: inline;"> Dynamic Vehicle Routing Problem (DVRP), is an extension of the classic Vehicle Routing Problem (VRP), which is a fundamental problem in logistics and transportation. Typically, DVRPs involve two stakeholders: service providers that deliver services to customers and customers who raise requests from different locations. Many real-world applications can be formulated as DVRP such as ridesharing and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19184v1-abstract-full').style.display = 'inline'; document.getElementById('2405.19184v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.19184v1-abstract-full" style="display: none;"> Dynamic Vehicle Routing Problem (DVRP), is an extension of the classic Vehicle Routing Problem (VRP), which is a fundamental problem in logistics and transportation. Typically, DVRPs involve two stakeholders: service providers that deliver services to customers and customers who raise requests from different locations. Many real-world applications can be formulated as DVRP such as ridesharing and non-compliance capture. Apart from original objectives like optimising total utility or efficiency, DVRP should also consider fairness for all parties. Unfairness can induce service providers and customers to give up on the systems, leading to negative financial and social impacts. However, most existing DVRP-related applications focus on improving fairness from a single side, and there have been few works considering two-sided fairness and utility optimisation concurrently. To this end, we propose a novel framework, a Two-sided Fairness-aware Genetic Algorithm (named 2FairGA), which expands the genetic algorithm from the original objective solely focusing on utility to multi-objectives that incorporate two-sided fairness. Subsequently, the impact of injecting two fairness definitions into the utility-focused model and the correlation between any pair of the three objectives are explored. Extensive experiments demonstrate the superiority of our proposed framework compared to the state-of-the-art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19184v1-abstract-full').style.display = 'none'; document.getElementById('2405.19184v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.17830">arXiv:2405.17830</a> <span> [<a href="https://arxiv.org/pdf/2405.17830">pdf</a>, <a href="https://arxiv.org/format/2405.17830">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> More Than Catastrophic Forgetting: Integrating General Capabilities For Domain-Specific LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chengyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yangyang Kang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shihang Wang</a>, <a href="/search/cs?searchtype=author&query=Qing%2C+L">Lizhi Qing</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+F">Fubang Zhao</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+C">Changlong Sun</a>, <a href="/search/cs?searchtype=author&query=Kuang%2C+K">Kun Kuang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+F">Fei Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.17830v2-abstract-short" style="display: inline;"> The performance on general tasks decreases after Large Language Models (LLMs) are fine-tuned on domain-specific tasks, the phenomenon is known as Catastrophic Forgetting (CF). However, this paper presents a further challenge for real application of domain-specific LLMs beyond CF, called General Capabilities Integration (GCI), which necessitates the integration of both the general capabilities and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17830v2-abstract-full').style.display = 'inline'; document.getElementById('2405.17830v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.17830v2-abstract-full" style="display: none;"> The performance on general tasks decreases after Large Language Models (LLMs) are fine-tuned on domain-specific tasks, the phenomenon is known as Catastrophic Forgetting (CF). However, this paper presents a further challenge for real application of domain-specific LLMs beyond CF, called General Capabilities Integration (GCI), which necessitates the integration of both the general capabilities and domain knowledge within a single instance. The objective of GCI is not merely to retain previously acquired general capabilities alongside new domain knowledge, but to harmonize and utilize both sets of skills in a cohesive manner to enhance performance on domain-specific tasks. Taking legal domain as an example, we carefully design three groups of training and testing tasks without lacking practicability, and construct the corresponding datasets. To better incorporate general capabilities across domain-specific scenarios, we introduce ALoRA, which utilizes a multi-head attention module upon LoRA, facilitating direct information transfer from preceding tokens to the current one. This enhancement permits the representation to dynamically switch between domain-specific knowledge and general competencies according to the attention. Extensive experiments are conducted on the proposed tasks. The results exhibit the significance of our setting, and the effectiveness of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17830v2-abstract-full').style.display = 'none'; document.getElementById('2405.17830v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.17234">arXiv:2405.17234</a> <span> [<a href="https://arxiv.org/pdf/2405.17234">pdf</a>, <a href="https://arxiv.org/format/2405.17234">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Benchmarking General-Purpose In-Context Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+F">Fan Wang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+C">Chuan Lin</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yang Cao</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yu Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.17234v6-abstract-short" style="display: inline;"> In-context learning (ICL) empowers generative models to address new tasks effectively and efficiently on the fly, without relying on any artificially crafted optimization techniques. In this paper, we study extending ICL to address a broader range of tasks with an extended learning horizon and higher improvement potential, namely General Purpose In-Context Learning (GPICL). To this end, we introdu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17234v6-abstract-full').style.display = 'inline'; document.getElementById('2405.17234v6-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.17234v6-abstract-full" style="display: none;"> In-context learning (ICL) empowers generative models to address new tasks effectively and efficiently on the fly, without relying on any artificially crafted optimization techniques. In this paper, we study extending ICL to address a broader range of tasks with an extended learning horizon and higher improvement potential, namely General Purpose In-Context Learning (GPICL). To this end, we introduce two lightweight benchmarks specifically crafted to train and evaluate GPICL functionalities. Each benchmark encompasses a vast number of tasks characterized by significant task variance. These tasks are also crafted to promote long-horizon in-context learning through continuous generation and interaction, covering domains such as language modeling, decision-making, and world modeling. The benchmarks necessitate the models to leverage contexts and history interactions to enhance their capabilities, which we believe to be the key characteristics of GPICL. Our experiments indicate that the diversity of training tasks is positively correlated with the ability to generalize with ICL, but inversely correlated with zero-shot capabilities. Additionally, our findings indicate that the scale of parameters alone may not be crucial for ICL or GPICL, suggesting alternative approaches such as increasing the scale of contexts and memory states. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17234v6-abstract-full').style.display = 'none'; document.getElementById('2405.17234v6-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.11802">arXiv:2405.11802</a> <span> [<a href="https://arxiv.org/pdf/2405.11802">pdf</a>, <a href="https://arxiv.org/format/2405.11802">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Counterfactual Explanation-Based Badminton Motion Guidance Generation Using Wearable Sensors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Seong%2C+M">Minwoo Seong</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+G">Gwangbin Kim</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yumin Kang</a>, <a href="/search/cs?searchtype=author&query=Jang%2C+J">Junhyuk Jang</a>, <a href="/search/cs?searchtype=author&query=DelPreto%2C+J">Joseph DelPreto</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+S">SeungJun Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.11802v1-abstract-short" style="display: inline;"> This study proposes a framework for enhancing the stroke quality of badminton players by generating personalized motion guides, utilizing a multimodal wearable dataset. These guides are based on counterfactual algorithms and aim to reduce the performance gap between novice and expert players. Our approach provides joint-level guidance through visualizable data to assist players in improving their… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11802v1-abstract-full').style.display = 'inline'; document.getElementById('2405.11802v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.11802v1-abstract-full" style="display: none;"> This study proposes a framework for enhancing the stroke quality of badminton players by generating personalized motion guides, utilizing a multimodal wearable dataset. These guides are based on counterfactual algorithms and aim to reduce the performance gap between novice and expert players. Our approach provides joint-level guidance through visualizable data to assist players in improving their movements without requiring expert knowledge. The method was evaluated against a traditional algorithm using metrics to assess validity, proximity, and plausibility, including arithmetic measures and motion-specific evaluation metrics. Our evaluation demonstrates that the proposed framework can generate motions that maintain the essence of original movements while enhancing stroke quality, providing closer guidance than direct expert motion replication. The results highlight the potential of our approach for creating personalized sports motion guides by generating counterfactual motion guidance for arbitrary input motion samples of badminton strokes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11802v1-abstract-full').style.display = 'none'; document.getElementById('2405.11802v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICRA Wearable Workshop 2024 - 1st Workshop on Advancing Wearable Devices and Applications through Novel Design, Sensing, Actuation, and AI</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Kang%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Kang%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Kang%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Kang%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Kang%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Kang%2C+Y&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Kang%2C+Y&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository