CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 109 results for author: <span class="mathjax">Song, T</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Song%2C+T">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Song, T"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Song%2C+T&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Song, T"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Song%2C+T&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Song%2C+T&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Song%2C+T&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Song%2C+T&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06376">arXiv:2411.06376</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.06376">pdf</a>, <a href="https://arxiv.org/format/2411.06376">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> Phantom: Constraining Generative Artificial Intelligence Models for Practical Domain Specific Peripherals Trace Synthesizing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Z">Zhibai Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+Y">Yihan Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+Y">Yongchen Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+Z">Zhixiang Wei</a>, <a href="/search/cs?searchtype=author&amp;query=wang%2C+Y">Yun wang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+F">Fangxin Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tao Song</a>, <a href="/search/cs?searchtype=author&amp;query=Qi%2C+Z">Zhengwei Qi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06376v1-abstract-short" style="display: inline;"> Peripheral Component Interconnect Express (PCIe) is the de facto interconnect standard for high-speed peripherals and CPUs. Prototyping and optimizing PCIe devices for emerging scenarios is an ongoing challenge. Since Transaction Layer Packets (TLPs) capture device-CPU interactions, it is crucial to analyze and generate realistic TLP traces for effective device design and optimization. Generative&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06376v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06376v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06376v1-abstract-full" style="display: none;"> Peripheral Component Interconnect Express (PCIe) is the de facto interconnect standard for high-speed peripherals and CPUs. Prototyping and optimizing PCIe devices for emerging scenarios is an ongoing challenge. Since Transaction Layer Packets (TLPs) capture device-CPU interactions, it is crucial to analyze and generate realistic TLP traces for effective device design and optimization. Generative AI offers a promising approach for creating intricate, custom TLP traces necessary for PCIe hardware and software development. However, existing models often generate impractical traces due to the absence of PCIe-specific constraints, such as TLP ordering and causality. This paper presents Phantom, the first framework that treats TLP trace generation as a generative AI problem while incorporating PCIe-specific constraints. We validate Phantom&#39;s effectiveness by generating TLP traces for an actual PCIe network interface card. Experimental results show that Phantom produces practical, large-scale TLP traces, significantly outperforming existing models, with improvements of up to 1000$\times$ in task-specific metrics and up to 2.19$\times$ in Frechet Inception Distance (FID) compared to backbone-only methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06376v1-abstract-full').style.display = 'none'; document.getElementById('2411.06376v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04578">arXiv:2411.04578</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.04578">pdf</a>, <a href="https://arxiv.org/format/2411.04578">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Multi-Agents are Social Groups: Investigating Social Influence of Multiple Agents in Human-Agent Interactions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tianqi Song</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+Y">Yugin Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Z">Zicheng Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+Y">Yibin Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+Y">Yi-Chieh Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04578v1-abstract-short" style="display: inline;"> Multi-agent systems - systems with multiple independent AI agents working together to achieve a common goal - are becoming increasingly prevalent in daily life. Drawing inspiration from the phenomenon of human group social influence, we investigate whether a group of AI agents can create social pressure on users to agree with them, potentially changing their stance on a topic. We conducted a study&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04578v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04578v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04578v1-abstract-full" style="display: none;"> Multi-agent systems - systems with multiple independent AI agents working together to achieve a common goal - are becoming increasingly prevalent in daily life. Drawing inspiration from the phenomenon of human group social influence, we investigate whether a group of AI agents can create social pressure on users to agree with them, potentially changing their stance on a topic. We conducted a study in which participants discussed social issues with either a single or multiple AI agents, and where the agents either agreed or disagreed with the user&#39;s stance on the topic. We found that conversing with multiple agents (holding conversation content constant) increased the social pressure felt by participants, and caused a greater shift in opinion towards the agents&#39; stances on each topic. Our study shows the potential advantages of multi-agent systems over single-agent platforms in causing opinion change. We discuss design implications for possible multi-agent systems that promote social good, as well as the potential for malicious actors to use these systems to manipulate public opinion. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04578v1-abstract-full').style.display = 'none'; document.getElementById('2411.04578v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22655">arXiv:2410.22655</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.22655">pdf</a>, <a href="https://arxiv.org/format/2410.22655">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FlowDCN: Exploring DCN-like Architectures for Fast Image Generation with Arbitrary Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Shuai Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zexian Li</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tianhui Song</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xubin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ge%2C+T">Tiezheng Ge</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+B">Bo Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Limin Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22655v1-abstract-short" style="display: inline;"> Arbitrary-resolution image generation still remains a challenging task in AIGC, as it requires handling varying resolutions and aspect ratios while maintaining high visual quality. Existing transformer-based diffusion methods suffer from quadratic computation cost and limited resolution extrapolation capabilities, making them less effective for this task. In this paper, we propose FlowDCN, a purel&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22655v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22655v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22655v1-abstract-full" style="display: none;"> Arbitrary-resolution image generation still remains a challenging task in AIGC, as it requires handling varying resolutions and aspect ratios while maintaining high visual quality. Existing transformer-based diffusion methods suffer from quadratic computation cost and limited resolution extrapolation capabilities, making them less effective for this task. In this paper, we propose FlowDCN, a purely convolution-based generative model with linear time and memory complexity, that can efficiently generate high-quality images at arbitrary resolutions. Equipped with a new design of learnable group-wise deformable convolution block, our FlowDCN yields higher flexibility and capability to handle different resolutions with a single model. FlowDCN achieves the state-of-the-art 4.30 sFID on $256\times256$ ImageNet Benchmark and comparable resolution extrapolation results, surpassing transformer-based counterparts in terms of convergence speed (only $\frac{1}{5}$ images), visual quality, parameters ($8\%$ reduction) and FLOPs ($20\%$ reduction). We believe FlowDCN offers a promising solution to scalable and flexible image synthesis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22655v1-abstract-full').style.display = 'none'; document.getElementById('2410.22655v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted on NeurIPS24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16144">arXiv:2410.16144</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.16144">pdf</a>, <a href="https://arxiv.org/format/2410.16144">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> 1-bit AI Infra: Part 1.1, Fast and Lossless BitNet b1.58 Inference on CPUs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jinheng Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+H">Hansong Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Ting Song</a>, <a href="/search/cs?searchtype=author&amp;query=Mao%2C+S">Shaoguang Mao</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+S">Shuming Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hongyu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yan Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+F">Furu Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16144v2-abstract-short" style="display: inline;"> Recent advances in 1-bit Large Language Models (LLMs), such as BitNet and BitNet b1.58, present a promising approach to enhancing the efficiency of LLMs in terms of speed and energy consumption. These developments also enable local LLM deployment across a broad range of devices. In this work, we introduce bitnet.cpp, a tailored software stack designed to unlock the full potential of 1-bit LLMs. Sp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16144v2-abstract-full').style.display = 'inline'; document.getElementById('2410.16144v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16144v2-abstract-full" style="display: none;"> Recent advances in 1-bit Large Language Models (LLMs), such as BitNet and BitNet b1.58, present a promising approach to enhancing the efficiency of LLMs in terms of speed and energy consumption. These developments also enable local LLM deployment across a broad range of devices. In this work, we introduce bitnet.cpp, a tailored software stack designed to unlock the full potential of 1-bit LLMs. Specifically, we develop a set of kernels to support fast and lossless inference of ternary BitNet b1.58 LLMs on CPUs. Extensive experiments demonstrate that bitnet.cpp achieves significant speedups, ranging from 2.37x to 6.17x on x86 CPUs and from 1.37x to 5.07x on ARM CPUs, across various model sizes. The code is available at https://github.com/microsoft/BitNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16144v2-abstract-full').style.display = 'none'; document.getElementById('2410.16144v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15846">arXiv:2410.15846</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.15846">pdf</a>, <a href="https://arxiv.org/format/2410.15846">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Modelling Concurrent RTP Flows for End-to-end Predictions of QoS in Real Time Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tailai Song</a>, <a href="/search/cs?searchtype=author&amp;query=Garza%2C+P">Paolo Garza</a>, <a href="/search/cs?searchtype=author&amp;query=Meo%2C+M">Michela Meo</a>, <a href="/search/cs?searchtype=author&amp;query=Munaf%C3%B2%2C+M+M">Maurizio Matteo Munaf貌</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15846v1-abstract-short" style="display: inline;"> The Real-time Transport Protocol (RTP)-based real-time communications (RTC) applications, exemplified by video conferencing, have experienced an unparalleled surge in popularity and development in recent years. In pursuit of optimizing their performance, the prediction of Quality of Service (QoS) metrics emerges as a pivotal endeavor, bolstering network monitoring and proactive solutions. However,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15846v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15846v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15846v1-abstract-full" style="display: none;"> The Real-time Transport Protocol (RTP)-based real-time communications (RTC) applications, exemplified by video conferencing, have experienced an unparalleled surge in popularity and development in recent years. In pursuit of optimizing their performance, the prediction of Quality of Service (QoS) metrics emerges as a pivotal endeavor, bolstering network monitoring and proactive solutions. However, contemporary approaches are confined to individual RTP flows and metrics, falling short in relationship capture and computational efficiency. To this end, we propose Packet-to-Prediction (P2P), a novel deep learning (DL) framework that hinges on raw packets to simultaneously process concurrent RTP flows and perform end-to-end prediction of multiple QoS metrics. Specifically, we implement a streamlined architecture, namely length-free Transformer with cross and neighbourhood attention, capable of handling an unlimited number of RTP flows, and employ a multi-task learning paradigm to forecast four key metrics in a single shot. Our work is based on extensive traffic collected during real video calls, and conclusively, P2P excels comparative models in both prediction performance and temporal efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15846v1-abstract-full').style.display = 'none'; document.getElementById('2410.15846v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13592">arXiv:2410.13592</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.13592">pdf</a>, <a href="https://arxiv.org/format/2410.13592">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optics">physics.optics</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> OAH-Net: A Deep Neural Network for Hologram Reconstruction of Off-axis Digital Holographic Microscope </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+W">Wei Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Delikoyun%2C+K">Kerem Delikoyun</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Q">Qianyu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Yildiz%2C+A">Alperen Yildiz</a>, <a href="/search/cs?searchtype=author&amp;query=Myo%2C+S+K">Si Ko Myo</a>, <a href="/search/cs?searchtype=author&amp;query=Kuan%2C+W+S">Win Sen Kuan</a>, <a href="/search/cs?searchtype=author&amp;query=Soong%2C+J+T+Y">John Tshon Yit Soong</a>, <a href="/search/cs?searchtype=author&amp;query=Cove%2C+M+E">Matthew Edward Cove</a>, <a href="/search/cs?searchtype=author&amp;query=Hayden%2C+O">Oliver Hayden</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+H">Hweekuan Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13592v1-abstract-short" style="display: inline;"> Off-axis digital holographic microscopy is a high-throughput, label-free imaging technology that provides three-dimensional, high-resolution information about samples, particularly useful in large-scale cellular imaging. However, the hologram reconstruction process poses a significant bottleneck for timely data analysis. To address this challenge, we propose a novel reconstruction approach that in&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13592v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13592v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13592v1-abstract-full" style="display: none;"> Off-axis digital holographic microscopy is a high-throughput, label-free imaging technology that provides three-dimensional, high-resolution information about samples, particularly useful in large-scale cellular imaging. However, the hologram reconstruction process poses a significant bottleneck for timely data analysis. To address this challenge, we propose a novel reconstruction approach that integrates deep learning with the physical principles of off-axis holography. We initialized part of the network weights based on the physical principle and then fine-tuned them via weakly supersized learning. Our off-axis hologram network (OAH-Net) retrieves phase and amplitude images with errors that fall within the measurement error range attributable to hardware, and its reconstruction speed significantly surpasses the microscope&#39;s acquisition rate. Crucially, OAH-Net demonstrates remarkable external generalization capabilities on unseen samples with distinct patterns and can be seamlessly integrated with other models for downstream tasks to achieve end-to-end real-time hologram analysis. This capability further expands off-axis holography&#39;s applications in both biological and medical studies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13592v1-abstract-full').style.display = 'none'; document.getElementById('2410.13592v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19976">arXiv:2409.19976</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.19976">pdf</a>, <a href="https://arxiv.org/format/2409.19976">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> Learning Partial Differential Equations with Deep Parallel Neural Operator </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ma%2C+Q">Qinglong Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+P">Peizhi Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Sen Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tao Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19976v2-abstract-short" style="display: inline;"> In recent years, Solving partial differential equations has shifted the focus of traditional neural network studies from finite-dimensional Euclidean spaces to generalized functional spaces in research. A novel methodology is to learn an operator as a means of approximating the mapping between outputs. Currently, researchers have proposed a variety of operator architectures. Nevertheless, the majo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19976v2-abstract-full').style.display = 'inline'; document.getElementById('2409.19976v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19976v2-abstract-full" style="display: none;"> In recent years, Solving partial differential equations has shifted the focus of traditional neural network studies from finite-dimensional Euclidean spaces to generalized functional spaces in research. A novel methodology is to learn an operator as a means of approximating the mapping between outputs. Currently, researchers have proposed a variety of operator architectures. Nevertheless, the majority of these architectures adopt an iterative update architecture, whereby a single operator is learned from the same function space. In practical physical science problems, the numerical solutions of partial differential equations are complex, and a serial single operator is unable to accurately approximate the intricate mapping between input and output. So, We propose a deep parallel operator model (DPNO) for efficiently and accurately solving partial differential equations. DPNO employs convolutional neural networks to extract local features and map data into distinct latent spaces. Designing a parallel block of double Fourier neural operators to solve the iterative error problem. DPNO approximates complex mappings between inputs and outputs by learning multiple operators in different potential spaces in parallel blocks. DPNO achieved the best performance on five of them, with an average improvement of 10.5\%, and ranked second on one dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19976v2-abstract-full').style.display = 'none'; document.getElementById('2409.19976v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13185">arXiv:2409.13185</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.13185">pdf</a>, <a href="https://arxiv.org/format/2409.13185">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Mathematical Physics">math-ph</span> </div> </div> <p class="title is-5 mathjax"> ASPINN: An asymptotic strategy for solving singularly perturbed differential equations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Sen Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+P">Peizhi Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tao Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13185v1-abstract-short" style="display: inline;"> Solving Singularly Perturbed Differential Equations (SPDEs) presents challenges due to the rapid change of their solutions at the boundary layer. In this manuscript, We propose Asymptotic Physics-Informed Neural Networks (ASPINN), a generalization of Physics-Informed Neural Networks (PINN) and General-Kindred Physics-Informed Neural Networks (GKPINN) approaches. This is a decomposition method base&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13185v1-abstract-full').style.display = 'inline'; document.getElementById('2409.13185v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13185v1-abstract-full" style="display: none;"> Solving Singularly Perturbed Differential Equations (SPDEs) presents challenges due to the rapid change of their solutions at the boundary layer. In this manuscript, We propose Asymptotic Physics-Informed Neural Networks (ASPINN), a generalization of Physics-Informed Neural Networks (PINN) and General-Kindred Physics-Informed Neural Networks (GKPINN) approaches. This is a decomposition method based on the idea of asymptotic analysis. Compared to PINN, the ASPINN method has a strong fitting ability for solving SPDEs due to the placement of exponential layers at the boundary layer. Unlike GKPINN, ASPINN lessens the number of fully connected layers, thereby reducing the training cost more effectively. Moreover, ASPINN theoretically approximates the solution at the boundary layer more accurately, which accuracy is also improved compared to GKPINN. We demonstrate the effect of ASPINN by solving diverse classes of SPDEs, which clearly shows that the ASPINN method is promising in boundary layer problems. Furthermore, we introduce Chebyshev Kolmogorov-Arnold Networks (Chebyshev-KAN) instead of MLP, achieving better performance in various experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13185v1-abstract-full').style.display = 'none'; document.getElementById('2409.13185v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.10343">arXiv:2409.10343</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.10343">pdf</a>, <a href="https://arxiv.org/format/2409.10343">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Large Language Model Enhanced Hard Sample Identification for Denoising Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tianrui Song</a>, <a href="/search/cs?searchtype=author&amp;query=Chao%2C+W">Wenshuo Chao</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Hao Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.10343v1-abstract-short" style="display: inline;"> Implicit feedback, often used to build recommender systems, unavoidably confronts noise due to factors such as misclicks and position bias. Previous studies have attempted to alleviate this by identifying noisy samples based on their diverged patterns, such as higher loss values, and mitigating the noise through sample dropping or reweighting. Despite the progress, we observe existing approaches s&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10343v1-abstract-full').style.display = 'inline'; document.getElementById('2409.10343v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.10343v1-abstract-full" style="display: none;"> Implicit feedback, often used to build recommender systems, unavoidably confronts noise due to factors such as misclicks and position bias. Previous studies have attempted to alleviate this by identifying noisy samples based on their diverged patterns, such as higher loss values, and mitigating the noise through sample dropping or reweighting. Despite the progress, we observe existing approaches struggle to distinguish hard samples and noise samples, as they often exhibit similar patterns, thereby limiting their effectiveness in denoising recommendations. To address this challenge, we propose a Large Language Model Enhanced Hard Sample Denoising (LLMHD) framework. Specifically, we construct an LLM-based scorer to evaluate the semantic consistency of items with the user preference, which is quantified based on summarized historical user interactions. The resulting scores are used to assess the hardness of samples for the pointwise or pairwise training objectives. To ensure efficiency, we introduce a variance-based sample pruning strategy to filter potential hard samples before scoring. Besides, we propose an iterative preference update module designed to continuously refine summarized user preference, which may be biased due to false-positive user-item interactions. Extensive experiments on three real-world datasets and four backbone recommenders demonstrate the effectiveness of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.10343v1-abstract-full').style.display = 'none'; document.getElementById('2409.10343v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.00926">arXiv:2409.00926</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.00926">pdf</a>, <a href="https://arxiv.org/format/2409.00926">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Towards Student Actions in Classroom Scenes: New Dataset and Baseline </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tan%2C+Z">Zhuolin Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+C">Chenqiang Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Qin%2C+A">Anyong Qin</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+R">Ruixin Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tiecheng Song</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+F">Feng Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Meng%2C+D">Deyu Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.00926v1-abstract-short" style="display: inline;"> Analyzing student actions is an important and challenging task in educational research. Existing efforts have been hampered by the lack of accessible datasets to capture the nuanced action dynamics in classrooms. In this paper, we present a new multi-label student action video (SAV) dataset for complex classroom scenes. The dataset consists of 4,324 carefully trimmed video clips from 758 different&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00926v1-abstract-full').style.display = 'inline'; document.getElementById('2409.00926v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.00926v1-abstract-full" style="display: none;"> Analyzing student actions is an important and challenging task in educational research. Existing efforts have been hampered by the lack of accessible datasets to capture the nuanced action dynamics in classrooms. In this paper, we present a new multi-label student action video (SAV) dataset for complex classroom scenes. The dataset consists of 4,324 carefully trimmed video clips from 758 different classrooms, each labeled with 15 different actions displayed by students in classrooms. Compared to existing behavioral datasets, our dataset stands out by providing a wide range of real classroom scenarios, high-quality video data, and unique challenges, including subtle movement differences, dense object engagement, significant scale differences, varied shooting angles, and visual occlusion. The increased complexity of the dataset brings new opportunities and challenges for benchmarking action detection. Innovatively, we also propose a new baseline method, a visual transformer for enhancing attention to key local details in small and dense object regions. Our method achieves excellent performance with mean Average Precision (mAP) of 67.9\% and 27.4\% on SAV and AVA, respectively. This paper not only provides the dataset but also calls for further research into AI-driven educational tools that may transform teaching methodologies and learning outcomes. The code and dataset will be released at https://github.com/Ritatanz/SAV. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.00926v1-abstract-full').style.display = 'none'; document.getElementById('2409.00926v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14734">arXiv:2408.14734</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.14734">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Mathematical Physics">math-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> General-Kindred Physics-Informed Neural Network to the Solutions of Singularly Perturbed Differential Equations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Sen Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+P">Peizhi Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+Q">Qinglong Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tao Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14734v1-abstract-short" style="display: inline;"> Physics-Informed Neural Networks (PINNs) have become a promising research direction in the field of solving Partial Differential Equations (PDEs). Dealing with singular perturbation problems continues to be a difficult challenge in the field of PINN. The solution of singular perturbation problems often exhibits sharp boundary layers and steep gradients, and traditional PINN cannot achieve approxim&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14734v1-abstract-full').style.display = 'inline'; document.getElementById('2408.14734v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14734v1-abstract-full" style="display: none;"> Physics-Informed Neural Networks (PINNs) have become a promising research direction in the field of solving Partial Differential Equations (PDEs). Dealing with singular perturbation problems continues to be a difficult challenge in the field of PINN. The solution of singular perturbation problems often exhibits sharp boundary layers and steep gradients, and traditional PINN cannot achieve approximation of boundary layers. In this manuscript, we propose the General-Kindred Physics-Informed Neural Network (GKPINN) for solving Singular Perturbation Differential Equations (SPDEs). This approach utilizes asymptotic analysis to acquire prior knowledge of the boundary layer from the equation and establishes a novel network to assist PINN in approximating the boundary layer. It is compared with traditional PINN by solving examples of one-dimensional, two-dimensional, and time-varying SPDE equations. The research findings underscore the exceptional performance of our novel approach, GKPINN, which delivers a remarkable enhancement in reducing the $L_2$ error by two to four orders of magnitude compared to the established PINN methodology. This significant improvement is accompanied by a substantial acceleration in convergence rates, without compromising the high precision that is critical for our applications. Furthermore, GKPINN still performs well in extreme cases with perturbation parameters of ${1\times10}^{-38}$, demonstrating its excellent generalization ability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14734v1-abstract-full').style.display = 'none'; document.getElementById('2408.14734v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.13378">arXiv:2408.13378</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.13378">pdf</a>, <a href="https://arxiv.org/format/2408.13378">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> DrugAgent: Explainable Drug Repurposing Agent with Large Language Model-based Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Inoue%2C+Y">Yoshitaka Inoue</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tianci Song</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+T">Tianfan Fu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.13378v3-abstract-short" style="display: inline;"> Drug repurposing offers a promising avenue for accelerating drug development by identifying new therapeutic potentials of existing drugs. In this paper, we propose a multi-agent framework to enhance the drug repurposing process using state-of-the-art machine learning techniques and knowledge integration. Our framework comprises several specialized agents: an AI Agent trains robust drug-target inte&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13378v3-abstract-full').style.display = 'inline'; document.getElementById('2408.13378v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.13378v3-abstract-full" style="display: none;"> Drug repurposing offers a promising avenue for accelerating drug development by identifying new therapeutic potentials of existing drugs. In this paper, we propose a multi-agent framework to enhance the drug repurposing process using state-of-the-art machine learning techniques and knowledge integration. Our framework comprises several specialized agents: an AI Agent trains robust drug-target interaction (DTI) models; a Knowledge Graph Agent utilizes the drug-gene interaction database (DGIdb), DrugBank, Comparative Toxicogenomics Database (CTD), and Search Tool for Interactions of Chemicals (STITCH) to systematically extract DTIs; and a Search Agent interacts with biomedical literature to annotate and verify computational predictions. By integrating outputs from these agents, our system effectively harnesses diverse data sources, including external databases, to propose viable repurposing candidates. Preliminary results demonstrate the potential of our approach in not only predicting drug-disease interactions but also in reducing the time and cost associated with traditional drug discovery methods. This paper highlights the scalability of multi-agent systems in biomedical research and their role in driving innovation in drug repurposing. Our approach not only outperforms existing methods in predicting drug repurposing potential but also provides interpretable results, paving the way for more efficient and cost-effective drug discovery processes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13378v3-abstract-full').style.display = 'none'; document.getElementById('2408.13378v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.07096">arXiv:2408.07096</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.07096">pdf</a>, <a href="https://arxiv.org/format/2408.07096">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.14778/3685800.3685900">10.14778/3685800.3685900 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> OFL-W3: A One-shot Federated Learning System on Web 3.0 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+L">Linshan Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Duan%2C+M">Moming Duan</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+B">Bingsheng He</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yulin Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Yan%2C+P">Peishen Yan</a>, <a href="/search/cs?searchtype=author&amp;query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tao Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.07096v1-abstract-short" style="display: inline;"> Federated Learning (FL) addresses the challenges posed by data silos, which arise from privacy, security regulations, and ownership concerns. Despite these barriers, FL enables these isolated data repositories to participate in collaborative learning without compromising privacy or security. Concurrently, the advancement of blockchain technology and decentralized applications (DApps) within Web 3.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07096v1-abstract-full').style.display = 'inline'; document.getElementById('2408.07096v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.07096v1-abstract-full" style="display: none;"> Federated Learning (FL) addresses the challenges posed by data silos, which arise from privacy, security regulations, and ownership concerns. Despite these barriers, FL enables these isolated data repositories to participate in collaborative learning without compromising privacy or security. Concurrently, the advancement of blockchain technology and decentralized applications (DApps) within Web 3.0 heralds a new era of transformative possibilities in web development. As such, incorporating FL into Web 3.0 paves the path for overcoming the limitations of data silos through collaborative learning. However, given the transaction speed constraints of core blockchains such as Ethereum (ETH) and the latency in smart contracts, employing one-shot FL, which minimizes client-server interactions in traditional FL to a single exchange, is considered more apt for Web 3.0 environments. This paper presents a practical one-shot FL system for Web 3.0, termed OFL-W3. OFL-W3 capitalizes on blockchain technology by utilizing smart contracts for managing transactions. Meanwhile, OFL-W3 utilizes the Inter-Planetary File System (IPFS) coupled with Flask communication, to facilitate backend server operations to use existing one-shot FL algorithms. With the integration of the incentive mechanism, OFL-W3 showcases an effective implementation of one-shot FL on Web 3.0, offering valuable insights and future directions for AI combined with Web 3.0 studies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07096v1-abstract-full').style.display = 'none'; document.getElementById('2408.07096v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">VLDB 24 demo paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.15389">arXiv:2407.15389</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.15389">pdf</a>, <a href="https://arxiv.org/format/2407.15389">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Poisoning with A Pill: Circumventing Detection in Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Guo%2C+H">Hanxi Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tao Song</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+T">Tianhang Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&amp;query=Guan%2C+H">Haibing Guan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xiangyu Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.15389v1-abstract-short" style="display: inline;"> Without direct access to the client&#39;s data, federated learning (FL) is well-known for its unique strength in data privacy protection among existing distributed machine learning techniques. However, its distributive and iterative nature makes FL inherently vulnerable to various poisoning attacks. To counteract these threats, extensive defenses have been proposed to filter out malicious clients, usi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15389v1-abstract-full').style.display = 'inline'; document.getElementById('2407.15389v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.15389v1-abstract-full" style="display: none;"> Without direct access to the client&#39;s data, federated learning (FL) is well-known for its unique strength in data privacy protection among existing distributed machine learning techniques. However, its distributive and iterative nature makes FL inherently vulnerable to various poisoning attacks. To counteract these threats, extensive defenses have been proposed to filter out malicious clients, using various detection metrics. Based on our analysis of existing attacks and defenses, we find that there is a lack of attention to model redundancy. In neural networks, various model parameters contribute differently to the model&#39;s performance. However, existing attacks in FL manipulate all the model update parameters with the same strategy, making them easily detectable by common defenses. Meanwhile, the defenses also tend to analyze the overall statistical features of the entire model updates, leaving room for sophisticated attacks. Based on these observations, this paper proposes a generic and attack-agnostic augmentation approach designed to enhance the effectiveness and stealthiness of existing FL poisoning attacks against detection in FL, pointing out the inherent flaws of existing defenses and exposing the necessity of fine-grained FL security. Specifically, we employ a three-stage methodology that strategically constructs, generates, and injects poison (generated by existing attacks) into a pill (a tiny subnet with a novel structure) during the FL training, named as pill construction, pill poisoning, and pill injection accordingly. Extensive experimental results show that FL poisoning attacks enhanced by our method can bypass all the popular defenses, and can gain an up to 7x error rate increase, as well as on average a more than 2x error rate increase on both IID and non-IID data, in both cross-silo and cross-device FL systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.15389v1-abstract-full').style.display = 'none'; document.getElementById('2407.15389v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.01614">arXiv:2407.01614</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.01614">pdf</a>, <a href="https://arxiv.org/format/2407.01614">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Stability for Large Language Models Training in Constrained Bandwidth Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dai%2C+Y">Yun Dai</a>, <a href="/search/cs?searchtype=author&amp;query=Dharamsi%2C+T">Tejas Dharamsi</a>, <a href="/search/cs?searchtype=author&amp;query=Hsu%2C+B">Byron Hsu</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tao Song</a>, <a href="/search/cs?searchtype=author&amp;query=Firooz%2C+H">Hamed Firooz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.01614v3-abstract-short" style="display: inline;"> Training extremely large language models (LLMs) with billions of parameters is a computationally intensive task that pushes the limits of current data parallel training systems. While techniques like ZeRO++ have enabled efficient distributed training of such giant models on inexpensive low-bandwidth clusters, they can suffer from convergence issues due to potential race conditions in the hierarchi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01614v3-abstract-full').style.display = 'inline'; document.getElementById('2407.01614v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.01614v3-abstract-full" style="display: none;"> Training extremely large language models (LLMs) with billions of parameters is a computationally intensive task that pushes the limits of current data parallel training systems. While techniques like ZeRO++ have enabled efficient distributed training of such giant models on inexpensive low-bandwidth clusters, they can suffer from convergence issues due to potential race conditions in the hierarchical partitioning (hpZ) scheme employed to reduce cross-machine communication. In this work, we first show how these race conditions cause instability when training models with billions of parameters. We then propose a modification to the partitioning algorithm that addresses these convergence challenges while maintaining competitive training efficiency. Empirical evaluation on training the multi-billion parameters Falcon Models and Llama-2 models demonstrates the updated algorithm&#39;s ability to achieve reliable convergence on these massive models, where stock ZeRO++ hpZ fails to converge. The updated algorithm enables robust training of larger models with 98\% throughput and model training speed improvement without sacrificing the quality of convergence. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01614v3-abstract-full').style.display = 'none'; document.getElementById('2407.01614v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.07572">arXiv:2406.07572</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.07572">pdf</a>, <a href="https://arxiv.org/ps/2406.07572">ps</a>, <a href="https://arxiv.org/format/2406.07572">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Domain-specific ReAct for physics-integrated iterative modeling: A case study of LLM agents for gas path analysis of gas turbines </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tao Song</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+Y">Yuwei Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+C">Chenlong Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+K">Keyu Song</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+C">Chao Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+D">Dongxiang Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.07572v1-abstract-short" style="display: inline;"> This study explores the application of large language models (LLMs) with callable tools in energy and power engineering domain, focusing on gas path analysis of gas turbines. We developed a dual-agent tool-calling process to integrate expert knowledge, predefined tools, and LLM reasoning. We evaluated various LLMs, including LLama3, Qwen1.5 and GPT. Smaller models struggled with tool usage and par&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07572v1-abstract-full').style.display = 'inline'; document.getElementById('2406.07572v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.07572v1-abstract-full" style="display: none;"> This study explores the application of large language models (LLMs) with callable tools in energy and power engineering domain, focusing on gas path analysis of gas turbines. We developed a dual-agent tool-calling process to integrate expert knowledge, predefined tools, and LLM reasoning. We evaluated various LLMs, including LLama3, Qwen1.5 and GPT. Smaller models struggled with tool usage and parameter extraction, while larger models demonstrated favorable capabilities. All models faced challenges with complex, multi-component problems. Based on the test results, we infer that LLMs with nearly 100 billion parameters could meet professional scenario requirements with fine-tuning and advanced prompt design. Continued development are likely to enhance their accuracy and effectiveness, paving the way for more robust AI-driven solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.07572v1-abstract-full').style.display = 'none'; document.getElementById('2406.07572v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.05472">arXiv:2406.05472</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2406.05472">pdf</a>, <a href="https://arxiv.org/format/2406.05472">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> A Novel Generative AI-Based Framework for Anomaly Detection in Multicast Messages in Smart Grid Communications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zaboli%2C+A">Aydin Zaboli</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+S+L">Seong Lok Choi</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tai-Jin Song</a>, <a href="/search/cs?searchtype=author&amp;query=Hong%2C+J">Junho Hong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.05472v1-abstract-short" style="display: inline;"> Cybersecurity breaches in digital substations can pose significant challenges to the stability and reliability of power system operations. To address these challenges, defense and mitigation techniques are required. Identifying and detecting anomalies in information and communication technology (ICT) is crucial to ensure secure device interactions within digital substations. This paper proposes a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05472v1-abstract-full').style.display = 'inline'; document.getElementById('2406.05472v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.05472v1-abstract-full" style="display: none;"> Cybersecurity breaches in digital substations can pose significant challenges to the stability and reliability of power system operations. To address these challenges, defense and mitigation techniques are required. Identifying and detecting anomalies in information and communication technology (ICT) is crucial to ensure secure device interactions within digital substations. This paper proposes a task-oriented dialogue (ToD) system for anomaly detection (AD) in datasets of multicast messages e.g., generic object oriented substation event (GOOSE) and sampled value (SV) in digital substations using large language models (LLMs). This model has a lower potential error and better scalability and adaptability than a process that considers the cybersecurity guidelines recommended by humans, known as the human-in-the-loop (HITL) process. Also, this methodology significantly reduces the effort required when addressing new cyber threats or anomalies compared with machine learning (ML) techniques, since it leaves the models complexity and precision unaffected and offers a faster implementation. These findings present a comparative assessment, conducted utilizing standard and advanced performance evaluation metrics for the proposed AD framework and the HITL process. To generate and extract datasets of IEC 61850 communications, a hardware-in-the-loop (HIL) testbed was employed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05472v1-abstract-full').style.display = 'none'; document.getElementById('2406.05472v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 10 figures, Submitted to IEEE Transactions on Information Forensics and Security</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.19931">arXiv:2405.19931</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.19931">pdf</a>, <a href="https://arxiv.org/format/2405.19931">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Exploring Diffusion Models&#39; Corruption Stage in Few-Shot Fine-tuning and Mitigating with Bayesian Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wu%2C+X">Xiaoyu Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jiaru Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&amp;query=Lyu%2C+B">Bohan Lyu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tao Song</a>, <a href="/search/cs?searchtype=author&amp;query=Guan%2C+H">Haibing Guan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.19931v1-abstract-short" style="display: inline;"> Few-shot fine-tuning of Diffusion Models (DMs) is a key advancement, significantly reducing training costs and enabling personalized AI applications. However, we explore the training dynamics of DMs and observe an unanticipated phenomenon: during the training process, image fidelity initially improves, then unexpectedly deteriorates with the emergence of noisy patterns, only to recover later with&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19931v1-abstract-full').style.display = 'inline'; document.getElementById('2405.19931v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.19931v1-abstract-full" style="display: none;"> Few-shot fine-tuning of Diffusion Models (DMs) is a key advancement, significantly reducing training costs and enabling personalized AI applications. However, we explore the training dynamics of DMs and observe an unanticipated phenomenon: during the training process, image fidelity initially improves, then unexpectedly deteriorates with the emergence of noisy patterns, only to recover later with severe overfitting. We term the stage with generated noisy patterns as corruption stage. To understand this corruption stage, we begin by theoretically modeling the one-shot fine-tuning scenario, and then extend this modeling to more general cases. Through this modeling, we identify the primary cause of this corruption stage: a narrowed learning distribution inherent in the nature of few-shot fine-tuning. To tackle this, we apply Bayesian Neural Networks (BNNs) on DMs with variational inference to implicitly broaden the learned distribution, and present that the learning target of the BNNs can be naturally regarded as an expectation of the diffusion loss and a further regularization with the pretrained DMs. This approach is highly compatible with current few-shot fine-tuning methods in DMs and does not introduce any extra inference costs. Experimental results demonstrate that our method significantly mitigates corruption, and improves the fidelity, quality and diversity of the generated images in both object-driven and subject-driven generation tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.19931v1-abstract-full').style.display = 'none'; document.getElementById('2405.19931v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint. Under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.18784">arXiv:2405.18784</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.18784">pdf</a>, <a href="https://arxiv.org/format/2405.18784">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LP-3DGS: Learning to Prune 3D Gaussian Splatting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhaoliang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tianchen Song</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+Y">Yongjae Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+L">Li Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+C">Cheng Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Chellappa%2C+R">Rama Chellappa</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+D">Deliang Fan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.18784v1-abstract-short" style="display: inline;"> Recently, 3D Gaussian Splatting (3DGS) has become one of the mainstream methodologies for novel view synthesis (NVS) due to its high quality and fast rendering speed. However, as a point-based scene representation, 3DGS potentially generates a large number of Gaussians to fit the scene, leading to high memory usage. Improvements that have been proposed require either an empirical and preset prunin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18784v1-abstract-full').style.display = 'inline'; document.getElementById('2405.18784v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.18784v1-abstract-full" style="display: none;"> Recently, 3D Gaussian Splatting (3DGS) has become one of the mainstream methodologies for novel view synthesis (NVS) due to its high quality and fast rendering speed. However, as a point-based scene representation, 3DGS potentially generates a large number of Gaussians to fit the scene, leading to high memory usage. Improvements that have been proposed require either an empirical and preset pruning ratio or importance score threshold to prune the point cloud. Such hyperparamter requires multiple rounds of training to optimize and achieve the maximum pruning ratio, while maintaining the rendering quality for each scene. In this work, we propose learning-to-prune 3DGS (LP-3DGS), where a trainable binary mask is applied to the importance score that can find optimal pruning ratio automatically. Instead of using the traditional straight-through estimator (STE) method to approximate the binary mask gradient, we redesign the masking function to leverage the Gumbel-Sigmoid method, making it differentiable and compatible with the existing training process of 3DGS. Extensive experiments have shown that LP-3DGS consistently produces a good balance that is both efficient and high quality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18784v1-abstract-full').style.display = 'none'; document.getElementById('2405.18784v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.09200">arXiv:2405.09200</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.09200">pdf</a>, <a href="https://arxiv.org/ps/2405.09200">ps</a>, <a href="https://arxiv.org/format/2405.09200">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Performance Analysis of RIS-aided MISO Systems with EMI and Channel Aging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Taoyu Song</a>, <a href="/search/cs?searchtype=author&amp;query=Shi%2C+E">Enyu Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+Y">Yu Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Y">Yiyang Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jiayi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Ai%2C+B">Bo Ai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.09200v1-abstract-short" style="display: inline;"> In this paper, we investigate a reconfigurable intelligent surface (RIS)-aided multiple-input single-output (MISO) system in the presence of electromagnetic interference (EMI) and channel aging with a Rician fading channel model between the base station (BS) and user equipment (UE). Specifically, we derive the closed-form expression for downlink spectral efficiency (SE) with maximum ratio transmis&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09200v1-abstract-full').style.display = 'inline'; document.getElementById('2405.09200v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.09200v1-abstract-full" style="display: none;"> In this paper, we investigate a reconfigurable intelligent surface (RIS)-aided multiple-input single-output (MISO) system in the presence of electromagnetic interference (EMI) and channel aging with a Rician fading channel model between the base station (BS) and user equipment (UE). Specifically, we derive the closed-form expression for downlink spectral efficiency (SE) with maximum ratio transmission (MRT) precoding. The Monte-Carlo simulation supports the theoretical results, demonstrating that amplifying the weight of the line-of-sight (LoS) component in Rician fading channels can boost SE, while EMI has a detrimental impact. Furthermore, continuously increasing the number of RIS elements is not an optimal choice when EMI exists. Nonetheless, RIS can be deployed to compensate for SE degradation caused by channel aging effects. Finally, enlarging the RIS elements size can significantly improve system performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.09200v1-abstract-full').style.display = 'none'; document.getElementById('2405.09200v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.06277">arXiv:2405.06277</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.06277">pdf</a>, <a href="https://arxiv.org/format/2405.06277">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Learning A Spiking Neural Network for Efficient Image Deraining </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tianyu Song</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+G">Guiyue Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+P">Pengpeng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+K">Kui Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xiang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+J">Jiyu Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.06277v1-abstract-short" style="display: inline;"> Recently, spiking neural networks (SNNs) have demonstrated substantial potential in computer vision tasks. In this paper, we present an Efficient Spiking Deraining Network, called ESDNet. Our work is motivated by the observation that rain pixel values will lead to a more pronounced intensity of spike signals in SNNs. However, directly applying deep SNNs to image deraining task still remains a sign&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06277v1-abstract-full').style.display = 'inline'; document.getElementById('2405.06277v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.06277v1-abstract-full" style="display: none;"> Recently, spiking neural networks (SNNs) have demonstrated substantial potential in computer vision tasks. In this paper, we present an Efficient Spiking Deraining Network, called ESDNet. Our work is motivated by the observation that rain pixel values will lead to a more pronounced intensity of spike signals in SNNs. However, directly applying deep SNNs to image deraining task still remains a significant challenge. This is attributed to the information loss and training difficulties that arise from discrete binary activation and complex spatio-temporal dynamics. To this end, we develop a spiking residual block to convert the input into spike signals, then adaptively optimize the membrane potential by introducing attention weights to adjust spike responses in a data-driven manner, alleviating information loss caused by discrete binary activation. By this way, our ESDNet can effectively detect and analyze the characteristics of rain streaks by learning their fluctuations. This also enables better guidance for the deraining process and facilitates high-quality image reconstruction. Instead of relying on the ANN-SNN conversion strategy, we introduce a gradient proxy strategy to directly train the model for overcoming the challenge of training. Experimental results show that our approach gains comparable performance against ANN-based methods while reducing energy consumption by 54%. The code source is available at https://github.com/MingTian99/ESDNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06277v1-abstract-full').style.display = 'none'; document.getElementById('2405.06277v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by IJCAI2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.19192">arXiv:2404.19192</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.19192">pdf</a>, <a href="https://arxiv.org/format/2404.19192">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Mix of Experts Language Model for Named Entity Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xinwei Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+K">Kun Li</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tianyou Song</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+J">Jiangjian Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.19192v1-abstract-short" style="display: inline;"> Named Entity Recognition (NER) is an essential steppingstone in the field of natural language processing. Although promising performance has been achieved by various distantly supervised models, we argue that distant supervision inevitably introduces incomplete and noisy annotations, which may mislead the model training process. To address this issue, we propose a robust NER model named BOND-MoE b&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.19192v1-abstract-full').style.display = 'inline'; document.getElementById('2404.19192v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.19192v1-abstract-full" style="display: none;"> Named Entity Recognition (NER) is an essential steppingstone in the field of natural language processing. Although promising performance has been achieved by various distantly supervised models, we argue that distant supervision inevitably introduces incomplete and noisy annotations, which may mislead the model training process. To address this issue, we propose a robust NER model named BOND-MoE based on Mixture of Experts (MoE). Instead of relying on a single model for NER prediction, multiple models are trained and ensembled under the Expectation-Maximization (EM) framework, so that noisy supervision can be dramatically alleviated. In addition, we introduce a fair assignment module to balance the document-model assignment process. Extensive experiments on real-world datasets show that the proposed method achieves state-of-the-art performance compared with other distantly supervised NER. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.19192v1-abstract-full').style.display = 'none'; document.getElementById('2404.19192v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.14617">arXiv:2404.14617</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.14617">pdf</a>, <a href="https://arxiv.org/format/2404.14617">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> TDRAM: Tag-enhanced DRAM for Efficient Caching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Babaie%2C+M">Maryam Babaie</a>, <a href="/search/cs?searchtype=author&amp;query=Akram%2C+A">Ayaz Akram</a>, <a href="/search/cs?searchtype=author&amp;query=Elsasser%2C+W">Wendy Elsasser</a>, <a href="/search/cs?searchtype=author&amp;query=Haukness%2C+B">Brent Haukness</a>, <a href="/search/cs?searchtype=author&amp;query=Miller%2C+M">Michael Miller</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Taeksang Song</a>, <a href="/search/cs?searchtype=author&amp;query=Vogelsang%2C+T">Thomas Vogelsang</a>, <a href="/search/cs?searchtype=author&amp;query=Woo%2C+S">Steven Woo</a>, <a href="/search/cs?searchtype=author&amp;query=Lowe-Power%2C+J">Jason Lowe-Power</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.14617v1-abstract-short" style="display: inline;"> As SRAM-based caches are hitting a scaling wall, manufacturers are integrating DRAM-based caches into system designs to continue increasing cache sizes. While DRAM caches can improve the performance of memory systems, existing DRAM cache designs suffer from high miss penalties, wasted data movement, and interference between misses and demand requests. In this paper, we propose TDRAM, a novel DRAM&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14617v1-abstract-full').style.display = 'inline'; document.getElementById('2404.14617v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.14617v1-abstract-full" style="display: none;"> As SRAM-based caches are hitting a scaling wall, manufacturers are integrating DRAM-based caches into system designs to continue increasing cache sizes. While DRAM caches can improve the performance of memory systems, existing DRAM cache designs suffer from high miss penalties, wasted data movement, and interference between misses and demand requests. In this paper, we propose TDRAM, a novel DRAM microarchitecture tailored for caching. TDRAM enhances HBM3 by adding a set of small low-latency mats to store tags and metadata on the same die as the data mats. These mats enable fast parallel tag and data access, on-DRAM-die tag comparison, and conditional data response based on comparison result (reducing wasted data transfers) akin to SRAM caches mechanism. TDRAM further optimizes the hit and miss latencies by performing opportunistic early tag probing. Moreover, TDRAM introduces a flush buffer to store conflicting dirty data on write misses, eliminating turnaround delays on data bus. We evaluate TDRAM using a full-system simulator and a set of HPC workloads with large memory footprints showing TDRAM provides at least 2.6$\times$ faster tag check, 1.2$\times$ speedup, and 21% less energy consumption, compared to the state-of-the-art commercial and research designs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14617v1-abstract-full').style.display = 'none'; document.getElementById('2404.14617v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.13903">arXiv:2404.13903</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.13903">pdf</a>, <a href="https://arxiv.org/format/2404.13903">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Accelerating Image Generation with Sub-path Linear Approximation Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xu%2C+C">Chen Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tianhui Song</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+W">Weixin Feng</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xubin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ge%2C+T">Tiezheng Ge</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+B">Bo Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Limin Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.13903v3-abstract-short" style="display: inline;"> Diffusion models have significantly advanced the state of the art in image, audio, and video generation tasks. However, their applications in practical scenarios are hindered by slow inference speed. Drawing inspiration from the approximation strategies utilized in consistency models, we propose the Sub-path Linear Approximation Model (SLAM), which accelerates diffusion models while maintaining hi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13903v3-abstract-full').style.display = 'inline'; document.getElementById('2404.13903v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.13903v3-abstract-full" style="display: none;"> Diffusion models have significantly advanced the state of the art in image, audio, and video generation tasks. However, their applications in practical scenarios are hindered by slow inference speed. Drawing inspiration from the approximation strategies utilized in consistency models, we propose the Sub-path Linear Approximation Model (SLAM), which accelerates diffusion models while maintaining high-quality image generation. SLAM treats the PF-ODE trajectory as a series of PF-ODE sub-paths divided by sampled points, and harnesses sub-path linear (SL) ODEs to form a progressive and continuous error estimation along each individual PF-ODE sub-path. The optimization on such SL-ODEs allows SLAM to construct denoising mappings with smaller cumulative approximated errors. An efficient distillation method is also developed to facilitate the incorporation of more advanced diffusion models, such as latent diffusion models. Our extensive experimental results demonstrate that SLAM achieves an efficient training regimen, requiring only 6 A100 GPU days to produce a high-quality generative model capable of 2 to 4-step generation with high performance. Comprehensive evaluations on LAION, MS COCO 2014, and MS COCO 2017 datasets also illustrate that SLAM surpasses existing acceleration methods in few-step generation tasks, achieving state-of-the-art performance both on FID and the quality of the generated images. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13903v3-abstract-full').style.display = 'none'; document.getElementById('2404.13903v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.09405">arXiv:2404.09405</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.09405">pdf</a>, <a href="https://arxiv.org/format/2404.09405">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Few-shot Name Entity Recognition on StackOverflow </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xinwei Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+K">Kun Li</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tianyou Song</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+J">Jiangjian Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.09405v2-abstract-short" style="display: inline;"> StackOverflow, with its vast question repository and limited labeled examples, raise an annotation challenge for us. We address this gap by proposing RoBERTa+MAML, a few-shot named entity recognition (NER) method leveraging meta-learning. Our approach, evaluated on the StackOverflow NER corpus (27 entity types), achieves a 5% F1 score improvement over the baseline. We improved the results further&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09405v2-abstract-full').style.display = 'inline'; document.getElementById('2404.09405v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.09405v2-abstract-full" style="display: none;"> StackOverflow, with its vast question repository and limited labeled examples, raise an annotation challenge for us. We address this gap by proposing RoBERTa+MAML, a few-shot named entity recognition (NER) method leveraging meta-learning. Our approach, evaluated on the StackOverflow NER corpus (27 entity types), achieves a 5% F1 score improvement over the baseline. We improved the results further domain-specific phrase processing enhance results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.09405v2-abstract-full').style.display = 'none'; document.getElementById('2404.09405v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.01230">arXiv:2404.01230</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2404.01230">pdf</a>, <a href="https://arxiv.org/format/2404.01230">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LLM as a Mastermind: A Survey of Strategic Reasoning with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yadong Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Mao%2C+S">Shaoguang Mao</a>, <a href="/search/cs?searchtype=author&amp;query=Ge%2C+T">Tao Ge</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xun Wang</a>, <a href="/search/cs?searchtype=author&amp;query=de+Wynter%2C+A">Adrian de Wynter</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yan Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+W">Wenshan Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Ting Song</a>, <a href="/search/cs?searchtype=author&amp;query=Lan%2C+M">Man Lan</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+F">Furu Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.01230v1-abstract-short" style="display: inline;"> This paper presents a comprehensive survey of the current status and opportunities for Large Language Models (LLMs) in strategic reasoning, a sophisticated form of reasoning that necessitates understanding and predicting adversary actions in multi-agent settings while adjusting strategies accordingly. Strategic reasoning is distinguished by its focus on the dynamic and uncertain nature of interact&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01230v1-abstract-full').style.display = 'inline'; document.getElementById('2404.01230v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.01230v1-abstract-full" style="display: none;"> This paper presents a comprehensive survey of the current status and opportunities for Large Language Models (LLMs) in strategic reasoning, a sophisticated form of reasoning that necessitates understanding and predicting adversary actions in multi-agent settings while adjusting strategies accordingly. Strategic reasoning is distinguished by its focus on the dynamic and uncertain nature of interactions among multi-agents, where comprehending the environment and anticipating the behavior of others is crucial. We explore the scopes, applications, methodologies, and evaluation metrics related to strategic reasoning with LLMs, highlighting the burgeoning development in this area and the interdisciplinary approaches enhancing their decision-making performance. It aims to systematize and clarify the scattered literature on this subject, providing a systematic review that underscores the importance of strategic reasoning as a critical cognitive capability and offers insights into future research directions and potential improvements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.01230v1-abstract-full').style.display = 'none'; document.getElementById('2404.01230v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.13254">arXiv:2403.13254</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.13254">pdf</a>, <a href="https://arxiv.org/format/2403.13254">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Onset and offset weighted loss function for sound event detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tao Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.13254v1-abstract-short" style="display: inline;"> In a typical sound event detection (SED) system, the existence of a sound event is detected at a frame level, and consecutive frames with the same event detected are combined as one sound event. The median filter is applied as a post-processing step to remove detection errors as much as possible. However, detection errors occurring around the onset and offset of a sound event are beyond the capaci&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.13254v1-abstract-full').style.display = 'inline'; document.getElementById('2403.13254v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.13254v1-abstract-full" style="display: none;"> In a typical sound event detection (SED) system, the existence of a sound event is detected at a frame level, and consecutive frames with the same event detected are combined as one sound event. The median filter is applied as a post-processing step to remove detection errors as much as possible. However, detection errors occurring around the onset and offset of a sound event are beyond the capacity of the median filter. To address this issue, an onset and offset weighted binary cross-entropy (OWBCE) loss function is proposed in this paper, which trains the DNN model to be more robust on frames around (a) onsets and offsets. Experiments are carried out in the context of DCASE 2022 task 4. Results show that OWBCE outperforms BCE when different models are considered. For a basic CRNN, relative improvements of 6.43% in event-F1, 1.96% in PSDS1, and 2.43% in PSDS2 can be achieved by OWBCE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.13254v1-abstract-full').style.display = 'none'; document.getElementById('2403.13254v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.13252">arXiv:2403.13252</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.13252">pdf</a>, <a href="https://arxiv.org/format/2403.13252">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Frequency-aware convolution for sound event detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tao Song</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+W">WenWen Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.13252v2-abstract-short" style="display: inline;"> In sound event detection (SED), convolutional neural networks (CNNs) are widely employed to extract time-frequency (TF) patterns from spectrograms. However, the ability of CNNs to recognize different sound events is limited by their insensitivity to shifts of TF patterns along the frequency dimension, caused by translation equivariance. To address this issue, a model called frequency dynamic convo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.13252v2-abstract-full').style.display = 'inline'; document.getElementById('2403.13252v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.13252v2-abstract-full" style="display: none;"> In sound event detection (SED), convolutional neural networks (CNNs) are widely employed to extract time-frequency (TF) patterns from spectrograms. However, the ability of CNNs to recognize different sound events is limited by their insensitivity to shifts of TF patterns along the frequency dimension, caused by translation equivariance. To address this issue, a model called frequency dynamic convolution (FDY) has been proposed, which involves applying specific convolution kernels to different frequency components. However, FDY requires a significantly larger number of parameters and computational resources compared to a standard CNN. This paper proposes a more efficient solution called frequency-aware convolution (FAC). FAC incorporates frequency positional information by encoding it in a vector, which is then explicitly added to the input spectrogram. To ensure that the amplitude of the encoding vector matches that of the input spectrogram, the encoding vector is adaptively and channel-dependently scaled using self-attention. To evaluate the effectiveness of FAC, we conducted experiments within the context of the DCASE 2023 task 4. The results show that FAC achieves comparable performance to FDY while requiring only an additional 515 parameters, whereas FDY necessitates an additional 8.02 million parameters. Furthermore, an ablation study confirms that the adaptive and channel-dependent scaling of the encoding vector is critical to the performance of FAC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.13252v2-abstract-full').style.display = 'none'; document.getElementById('2403.13252v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.11162">arXiv:2403.11162</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.11162">pdf</a>, <a href="https://arxiv.org/format/2403.11162">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> CGI-DM: Digital Copyright Authentication for Diffusion Models via Contrasting Gradient Inversion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wu%2C+X">Xiaoyu Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&amp;query=Liang%2C+C">Chumeng Liang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jiaru Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tao Song</a>, <a href="/search/cs?searchtype=author&amp;query=Guan%2C+H">Haibing Guan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.11162v1-abstract-short" style="display: inline;"> Diffusion Models (DMs) have evolved into advanced image generation tools, especially for few-shot generation where a pretrained model is fine-tuned on a small set of images to capture a specific style or object. Despite their success, concerns exist about potential copyright violations stemming from the use of unauthorized data in this process. In response, we present Contrasting Gradient Inversio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11162v1-abstract-full').style.display = 'inline'; document.getElementById('2403.11162v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.11162v1-abstract-full" style="display: none;"> Diffusion Models (DMs) have evolved into advanced image generation tools, especially for few-shot generation where a pretrained model is fine-tuned on a small set of images to capture a specific style or object. Despite their success, concerns exist about potential copyright violations stemming from the use of unauthorized data in this process. In response, we present Contrasting Gradient Inversion for Diffusion Models (CGI-DM), a novel method featuring vivid visual representations for digital copyright authentication. Our approach involves removing partial information of an image and recovering missing details by exploiting conceptual differences between the pretrained and fine-tuned models. We formulate the differences as KL divergence between latent variables of the two models when given the same input image, which can be maximized through Monte Carlo sampling and Projected Gradient Descent (PGD). The similarity between original and recovered images serves as a strong indicator of potential infringements. Extensive experiments on the WikiArt and Dreambooth datasets demonstrate the high accuracy of CGI-DM in digital copyright authentication, surpassing alternative validation techniques. Code implementation is available at https://github.com/Nicholas0228/Revelio. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.11162v1-abstract-full').style.display = 'none'; document.getElementById('2403.11162v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by CVPR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.03742">arXiv:2403.03742</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.03742">pdf</a>, <a href="https://arxiv.org/format/2403.03742">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Mitigating Ageism through Virtual Reality: Intergenerational Collaborative Escape Room Design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zou%2C+R">Ruotong Zou</a>, <a href="/search/cs?searchtype=author&amp;query=Yin%2C+S">Shuyu Yin</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tianqi Song</a>, <a href="/search/cs?searchtype=author&amp;query=Qin%2C+P">Peinuan Qin</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+Y">Yi-Chieh Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.03742v1-abstract-short" style="display: inline;"> As virtual reality (VR) becomes more popular for intergenerational collaboration, there is still a significant gap in research regarding understanding the potential for reducing ageism. Our study aims to address this gap by analyzing ageism levels before and after VR escape room collaborative experiences. We recruited 28 participants to collaborate with an older player in a challenging VR escape r&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03742v1-abstract-full').style.display = 'inline'; document.getElementById('2403.03742v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.03742v1-abstract-full" style="display: none;"> As virtual reality (VR) becomes more popular for intergenerational collaboration, there is still a significant gap in research regarding understanding the potential for reducing ageism. Our study aims to address this gap by analyzing ageism levels before and after VR escape room collaborative experiences. We recruited 28 participants to collaborate with an older player in a challenging VR escape room game. To ensure consistent and reliable performance data of older players, our experimenters simulated older participants following specific guidelines. After completing the game, we found a significant reduction in ageism among younger participants. Furthermore, we introduce a new game mechanism that encourages intergenerational collaboration. Our research highlights the potential of VR collaborative games as a practical tool for mitigating ageism. It provides valuable insights for designing immersive VR experiences that foster enhanced intergenerational collaboration. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.03742v1-abstract-full').style.display = 'none'; document.getElementById('2403.03742v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.00193">arXiv:2403.00193</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.00193">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Structural Resilience and Connectivity of the IPv6 Internet: An AS-level Topology Examination </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+B">Bin Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tianbo Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.00193v1-abstract-short" style="display: inline;"> The study utilizes a comprehensive dataset informed by IPv6 routing information to provide statistics, degree distribution, joint degree distribution, and clustering analysis of the IPv6 Internet&#39;s structure and resilience.The dataset includes 17,232 unique ASes and 10,000 unique IPv6 prefixes. Analysis reveals an interconnected network with an average path length of approximately 3 hops, suggesti&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00193v1-abstract-full').style.display = 'inline'; document.getElementById('2403.00193v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.00193v1-abstract-full" style="display: none;"> The study utilizes a comprehensive dataset informed by IPv6 routing information to provide statistics, degree distribution, joint degree distribution, and clustering analysis of the IPv6 Internet&#39;s structure and resilience.The dataset includes 17,232 unique ASes and 10,000 unique IPv6 prefixes. Analysis reveals an interconnected network with an average path length of approximately 3 hops, suggesting a robust and efficient network with potential redundancy and resilience, despite some isolated components. The paper outlines the degree distribution, indicating many peripheral nodes in a sparse network, and a clustering analysis showing a tendency for ASes to form clusters, which is indicative of redundancy and robustness against failures. The connectivity analysis, including path redundancy and reachability, supports the network&#39;s resilience.The findings are crucial for network design and strategic planning, particularly as IPv6 adoption increases. The paper emphasizes the importance of continuous monitoring and improvement of network connectivity in the evolving Internet landscape, highlighting the IPv6 Internet&#39;s resilience and structured connectivity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00193v1-abstract-full').style.display = 'none'; document.getElementById('2403.00193v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.00190">arXiv:2403.00190</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.00190">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Identification of important nodes in the information propagation network based on the artificial intelligence method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+B">Bin Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tianbo Song</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+J">Jerry Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.00190v1-abstract-short" style="display: inline;"> This study presents an integrated approach for identifying key nodes in information propagation networks using advanced artificial intelligence methods. We introduce a novel technique that combines the Decision-making Trial and Evaluation Laboratory (DEMATEL) method with the Global Structure Model (GSM), creating a synergistic model that effectively captures both local and global influences within&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00190v1-abstract-full').style.display = 'inline'; document.getElementById('2403.00190v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.00190v1-abstract-full" style="display: none;"> This study presents an integrated approach for identifying key nodes in information propagation networks using advanced artificial intelligence methods. We introduce a novel technique that combines the Decision-making Trial and Evaluation Laboratory (DEMATEL) method with the Global Structure Model (GSM), creating a synergistic model that effectively captures both local and global influences within a network. This method is applied across various complex networks, such as social, transportation, and communication systems, utilizing the Global Network Influence Dataset (GNID). Our analysis highlights the structural dynamics and resilience of these networks, revealing insights into node connectivity and community formation. The findings demonstrate the effectiveness of our AI-based approach in offering a comprehensive understanding of network behavior, contributing significantly to strategic network analysis and optimization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00190v1-abstract-full').style.display = 'none'; document.getElementById('2403.00190v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.18923">arXiv:2402.18923</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.18923">pdf</a>, <a href="https://arxiv.org/format/2402.18923">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Inappropriate Pause Detection In Dysarthric Speech Using Large-Scale Speech Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lee%2C+J">Jeehyun Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+Y">Yerin Choi</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tae-Jin Song</a>, <a href="/search/cs?searchtype=author&amp;query=Koo%2C+M">Myoung-Wan Koo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.18923v1-abstract-short" style="display: inline;"> Dysarthria, a common issue among stroke patients, severely impacts speech intelligibility. Inappropriate pauses are crucial indicators in severity assessment and speech-language therapy. We propose to extend a large-scale speech recognition model for inappropriate pause detection in dysarthric speech. To this end, we propose task design, labeling strategy, and a speech recognition model with an in&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18923v1-abstract-full').style.display = 'inline'; document.getElementById('2402.18923v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.18923v1-abstract-full" style="display: none;"> Dysarthria, a common issue among stroke patients, severely impacts speech intelligibility. Inappropriate pauses are crucial indicators in severity assessment and speech-language therapy. We propose to extend a large-scale speech recognition model for inappropriate pause detection in dysarthric speech. To this end, we propose task design, labeling strategy, and a speech recognition model with an inappropriate pause prediction layer. First, we treat pause detection as speech recognition, using an automatic speech recognition (ASR) model to convert speech into text with pause tags. According to the newly designed task, we label pause locations at the text level and their appropriateness. We collaborate with speech-language pathologists to establish labeling criteria, ensuring high-quality annotated data. Finally, we extend the ASR model with an inappropriate pause prediction layer for end-to-end inappropriate pause detection. Moreover, we propose a task-tailored metric for evaluating inappropriate pause detection independent of ASR performance. Our experiments show that the proposed method better detects inappropriate pauses in dysarthric speech than baselines. (Inappropriate Pause Error Rate: 14.47%) <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.18923v1-abstract-full').style.display = 'none'; document.getElementById('2402.18923v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ICASSP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.12994">arXiv:2401.12994</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.12994">pdf</a>, <a href="https://arxiv.org/format/2401.12994">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Automated Scoring of Clinical Patient Notes using Advanced NLP and Pseudo Labeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xu%2C+J">Jingyu Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+Y">Yifeng Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+B">Bin Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+S">Shulin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tianbo Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.12994v1-abstract-short" style="display: inline;"> Clinical patient notes are critical for documenting patient interactions, diagnoses, and treatment plans in medical practice. Ensuring accurate evaluation of these notes is essential for medical education and certification. However, manual evaluation is complex and time-consuming, often resulting in variability and resource-intensive assessments. To tackle these challenges, this research introduce&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.12994v1-abstract-full').style.display = 'inline'; document.getElementById('2401.12994v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.12994v1-abstract-full" style="display: none;"> Clinical patient notes are critical for documenting patient interactions, diagnoses, and treatment plans in medical practice. Ensuring accurate evaluation of these notes is essential for medical education and certification. However, manual evaluation is complex and time-consuming, often resulting in variability and resource-intensive assessments. To tackle these challenges, this research introduces an approach leveraging state-of-the-art Natural Language Processing (NLP) techniques, specifically Masked Language Modeling (MLM) pretraining, and pseudo labeling. Our methodology enhances efficiency and effectiveness, significantly reducing training time without compromising performance. Experimental results showcase improved model performance, indicating a potential transformation in clinical note assessment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.12994v1-abstract-full').style.display = 'none'; document.getElementById('2401.12994v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.09699">arXiv:2401.09699</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.09699">pdf</a>, <a href="https://arxiv.org/format/2401.09699">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Curriculum Recommendations Using Transformer Base Model with InfoNCE Loss And Language Switching Method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xu%2C+X">Xiaonan Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+B">Bin Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Mo%2C+Y">Yongyao Mo</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tianbo Song</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+S">Shulin Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.09699v1-abstract-short" style="display: inline;"> The Curriculum Recommendations paradigm is dedicated to fostering learning equality within the ever-evolving realms of educational technology and curriculum development. In acknowledging the inherent obstacles posed by existing methodologies, such as content conflicts and disruptions from language translation, this paradigm aims to confront and overcome these challenges. Notably, it addresses cont&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.09699v1-abstract-full').style.display = 'inline'; document.getElementById('2401.09699v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.09699v1-abstract-full" style="display: none;"> The Curriculum Recommendations paradigm is dedicated to fostering learning equality within the ever-evolving realms of educational technology and curriculum development. In acknowledging the inherent obstacles posed by existing methodologies, such as content conflicts and disruptions from language translation, this paradigm aims to confront and overcome these challenges. Notably, it addresses content conflicts and disruptions introduced by language translation, hindrances that can impede the creation of an all-encompassing and personalized learning experience. The paradigm&#39;s objective is to cultivate an educational environment that not only embraces diversity but also customizes learning experiences to suit the distinct needs of each learner. To overcome these challenges, our approach builds upon notable contributions in curriculum development and personalized learning, introducing three key innovations. These include the integration of Transformer Base Model to enhance computational efficiency, the implementation of InfoNCE Loss for accurate content-topic matching, and the adoption of a language switching strategy to alleviate translation-related ambiguities. Together, these innovations aim to collectively tackle inherent challenges and contribute to forging a more equitable and effective learning journey for a diverse range of learners. Competitive cross-validation scores underscore the efficacy of sentence-transformers/LaBSE, achieving 0.66314, showcasing our methodology&#39;s effectiveness in diverse linguistic nuances for content alignment prediction. Index Terms-Curriculum Recommendation, Transformer model with InfoNCE Loss, Language Switching. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.09699v1-abstract-full').style.display = 'none'; document.getElementById('2401.09699v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">4pages, 2 figures, ICAICA2023</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T50 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.17507">arXiv:2312.17507</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.17507">pdf</a>, <a href="https://arxiv.org/format/2312.17507">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Actuator-Constrained Reinforcement Learning for High-Speed Quadrupedal Locomotion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shin%2C+Y">Young-Ha Shin</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tae-Gyu Song</a>, <a href="/search/cs?searchtype=author&amp;query=Ji%2C+G">Gwanghyeon Ji</a>, <a href="/search/cs?searchtype=author&amp;query=Park%2C+H">Hae-Won Park</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.17507v1-abstract-short" style="display: inline;"> This paper presents a method for achieving high-speed running of a quadruped robot by considering the actuator torque-speed operating region in reinforcement learning. The physical properties and constraints of the actuator are included in the training process to reduce state transitions that are infeasible in the real world due to motor torque-speed limitations. The gait reward is designed to dis&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.17507v1-abstract-full').style.display = 'inline'; document.getElementById('2312.17507v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.17507v1-abstract-full" style="display: none;"> This paper presents a method for achieving high-speed running of a quadruped robot by considering the actuator torque-speed operating region in reinforcement learning. The physical properties and constraints of the actuator are included in the training process to reduce state transitions that are infeasible in the real world due to motor torque-speed limitations. The gait reward is designed to distribute motor torque evenly across all legs, contributing to more balanced power usage and mitigating performance bottlenecks due to single-motor saturation. Additionally, we designed a lightweight foot to enhance the robot&#39;s agility. We observed that applying the motor operating region as a constraint helps the policy network avoid infeasible areas during sampling. With the trained policy, KAIST Hound, a 45 kg quadruped robot, can run up to 6.5 m/s, which is the fastest speed among electric motor-based quadruped robots. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.17507v1-abstract-full').style.display = 'none'; document.getElementById('2312.17507v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.12484">arXiv:2312.12484</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.12484">pdf</a>, <a href="https://arxiv.org/format/2312.12484">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SkyMask: Attack-agnostic Robust Federated Learning with Fine-grained Learnable Masks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yan%2C+P">Peishen Yan</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tao Song</a>, <a href="/search/cs?searchtype=author&amp;query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+R">Ruhui Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+N">Ningxin Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Haghighat%2C+M+R">Mohammad R. Haghighat</a>, <a href="/search/cs?searchtype=author&amp;query=Guan%2C+H">Haibing Guan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.12484v2-abstract-short" style="display: inline;"> Federated Learning (FL) is becoming a popular paradigm for leveraging distributed data and preserving data privacy. However, due to the distributed characteristic, FL systems are vulnerable to Byzantine attacks that compromised clients attack the global model by uploading malicious model updates. With the development of layer-level and parameter-level fine-grained attacks, the attacks&#39; stealthines&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.12484v2-abstract-full').style.display = 'inline'; document.getElementById('2312.12484v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.12484v2-abstract-full" style="display: none;"> Federated Learning (FL) is becoming a popular paradigm for leveraging distributed data and preserving data privacy. However, due to the distributed characteristic, FL systems are vulnerable to Byzantine attacks that compromised clients attack the global model by uploading malicious model updates. With the development of layer-level and parameter-level fine-grained attacks, the attacks&#39; stealthiness and effectiveness have been significantly improved. The existing defense mechanisms solely analyze the model-level statistics of individual model updates uploaded by clients to mitigate Byzantine attacks, which are ineffective against fine-grained attacks due to unawareness or overreaction. To address this problem, we propose SkyMask, a new attack-agnostic robust FL system that firstly leverages fine-grained learnable masks to identify malicious model updates at the parameter level. Specifically, the FL server freezes and multiplies the model updates uploaded by clients with the parameter-level masks, and trains the masks over a small clean dataset (i.e., root dataset) to learn the subtle difference between benign and malicious model updates in a high-dimension space. Our extensive experiments involve different models on three public datasets under state-of-the-art (SOTA) attacks, where the results show that SkyMask achieves up to 14% higher testing accuracy compared with SOTA defense strategies under the same attacks and successfully defends against attacks with malicious clients of a high fraction up to 80%. Code is available at https://github.com/KoalaYan/SkyMask. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.12484v2-abstract-full').style.display = 'none'; document.getElementById('2312.12484v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ECCV2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.06169">arXiv:2312.06169</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.06169">pdf</a>, <a href="https://arxiv.org/format/2312.06169">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3390/rs16112024">10.3390/rs16112024 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Two-Stage Adaptive Network for Semi-Supervised Cross-Domain Crater Detection under Varying Scenario Distributions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yifan Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tiecheng Song</a>, <a href="/search/cs?searchtype=author&amp;query=Xian%2C+C">Chengye Xian</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+R">Ruiyuan Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yi Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+R">Rui Li</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+T">Tan Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.06169v2-abstract-short" style="display: inline;"> Crater detection can provide valuable information for humans to explore the topography and understand the history of extraterrestrial planets. Due to the significantly varying scenario distributions, existing detection models trained on known labelled crater datasets are hardly effective when applied to new unlabelled planets. To address this issue, we propose a two-stage adaptive network (TAN) fo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.06169v2-abstract-full').style.display = 'inline'; document.getElementById('2312.06169v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.06169v2-abstract-full" style="display: none;"> Crater detection can provide valuable information for humans to explore the topography and understand the history of extraterrestrial planets. Due to the significantly varying scenario distributions, existing detection models trained on known labelled crater datasets are hardly effective when applied to new unlabelled planets. To address this issue, we propose a two-stage adaptive network (TAN) for semi-supervised cross-domain crater detection. Our network is built on the YOLOv5 detector, where a series of strategies are employed to enhance its cross-domain generalisation ability. In the first stage, we propose an attention-based scale-adaptive fusion (ASAF) strategy to handle objects with significant scale variances. Furthermore, we propose a smoothing hard example mining (SHEM) loss function to address the issue of overfitting on hard examples. In the second stage, we propose a sort-based pseudo-labelling fine-tuning (SPF) strategy for semi-supervised learning to mitigate the distributional differences between source and target domains. For both stages, we employ weak or strong image augmentation to suit different cross-domain tasks. Experimental results on benchmark datasets demonstrate that the proposed network can enhance domain adaptation ability for crater detection under varying scenario distributions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.06169v2-abstract-full').style.display = 'none'; document.getElementById('2312.06169v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Liu, Y.; Song, T.; Xian, C.; Chen, R.; Zhao, Y.; Li, R.; Guo, T. Two-Stage Adaptive Network for Semi-Supervised Cross-Domain Crater Detection under Varying Scenario Distributions. Remote Sens. 2024, 16, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.04992">arXiv:2312.04992</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2312.04992">pdf</a>, <a href="https://arxiv.org/ps/2312.04992">ps</a>, <a href="https://arxiv.org/format/2312.04992">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> PFLlib: Personalized Federated Learning Algorithm Library </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jianqing Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tao Song</a>, <a href="/search/cs?searchtype=author&amp;query=Xue%2C+Z">Zhengui Xue</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+R">Ruhui Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+J">Jian Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.04992v1-abstract-short" style="display: inline;"> Amid the ongoing advancements in Federated Learning (FL), a machine learning paradigm that allows collaborative learning with data privacy protection, personalized FL (pFL) has gained significant prominence as a research direction within the FL domain. Whereas traditional FL (tFL) focuses on jointly learning a global model, pFL aims to achieve a balance between the global and personalized objectiv&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.04992v1-abstract-full').style.display = 'inline'; document.getElementById('2312.04992v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.04992v1-abstract-full" style="display: none;"> Amid the ongoing advancements in Federated Learning (FL), a machine learning paradigm that allows collaborative learning with data privacy protection, personalized FL (pFL) has gained significant prominence as a research direction within the FL domain. Whereas traditional FL (tFL) focuses on jointly learning a global model, pFL aims to achieve a balance between the global and personalized objectives of each client in FL settings. To foster the pFL research community, we propose PFLlib, a comprehensive pFL algorithm library with an integrated evaluation platform. In PFLlib, We implement 34 state-of-the-art FL algorithms (including 7 classic tFL algorithms and 27 pFL algorithms) and provide various evaluation environments with three statistically heterogeneous scenarios and 14 datasets. At present, PFLlib has already gained 850 stars and 199 forks on GitHub. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.04992v1-abstract-full').style.display = 'none'; document.getElementById('2312.04992v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.14975">arXiv:2311.14975</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.14975">pdf</a>, <a href="https://arxiv.org/format/2311.14975">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Eliminating Domain Bias for Federated Learning in Representation Space </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jianqing Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+J">Jian Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tao Song</a>, <a href="/search/cs?searchtype=author&amp;query=Xue%2C+Z">Zhengui Xue</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+R">Ruhui Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Guan%2C+H">Haibing Guan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.14975v1-abstract-short" style="display: inline;"> Recently, federated learning (FL) is popular for its privacy-preserving and collaborative learning abilities. However, under statistically heterogeneous scenarios, we observe that biased data domains on clients cause a representation bias phenomenon and further degenerate generic representations during local training, i.e., the representation degeneration phenomenon. To address these issues, we pr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.14975v1-abstract-full').style.display = 'inline'; document.getElementById('2311.14975v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.14975v1-abstract-full" style="display: none;"> Recently, federated learning (FL) is popular for its privacy-preserving and collaborative learning abilities. However, under statistically heterogeneous scenarios, we observe that biased data domains on clients cause a representation bias phenomenon and further degenerate generic representations during local training, i.e., the representation degeneration phenomenon. To address these issues, we propose a general framework Domain Bias Eliminator (DBE) for FL. Our theoretical analysis reveals that DBE can promote bi-directional knowledge transfer between server and client, as it reduces the domain discrepancy between server and client in representation space. Besides, extensive experiments on four datasets show that DBE can greatly improve existing FL methods in both generalization and personalization abilities. The DBE-equipped FL method can outperform ten state-of-the-art personalized FL methods by a large margin. Our code is public at https://github.com/TsingZ0/DBE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.14975v1-abstract-full').style.display = 'none'; document.getElementById('2311.14975v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2023, 24 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.11587">arXiv:2311.11587</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.11587">pdf</a>, <a href="https://arxiv.org/format/2311.11587">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.imavis.2024.105190">10.1016/j.imavis.2024.105190 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> LDConv: Linear deformable convolution for improving convolutional neural networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+Y">Yingze Song</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tingting Song</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+D">Degang Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+Y">Yichen Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+J">Jie Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Liming Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.11587v3-abstract-short" style="display: inline;"> Neural networks based on convolutional operations have achieved remarkable results in the field of deep learning, but there are two inherent flaws in standard convolutional operations. On the one hand, the convolution operation is confined to a local window, so it cannot capture information from other locations, and its sampled shapes is fixed. On the other hand, the size of the convolutional kern&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.11587v3-abstract-full').style.display = 'inline'; document.getElementById('2311.11587v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.11587v3-abstract-full" style="display: none;"> Neural networks based on convolutional operations have achieved remarkable results in the field of deep learning, but there are two inherent flaws in standard convolutional operations. On the one hand, the convolution operation is confined to a local window, so it cannot capture information from other locations, and its sampled shapes is fixed. On the other hand, the size of the convolutional kernel are fixed to k $\times$ k, which is a fixed square shape, and the number of parameters tends to grow squarely with size. Although Deformable Convolution (Deformable Conv) address the problem of fixed sampling of standard convolutions, the number of parameters also tends to grow in a squared manner. In response to the above questions, the Linear Deformable Convolution (LDConv) is explored in this work, which gives the convolution kernel an arbitrary number of parameters and arbitrary sampled shapes to provide richer options for the trade-off between network overhead and performance. In LDConv, a novel coordinate generation algorithm is defined to generate different initial sampled positions for convolutional kernels of arbitrary size. To adapt to changing targets, offsets are introduced to adjust the shape of the samples at each position. LDConv corrects the growth trend of the number of parameters for standard convolution and Deformable Conv to a linear growth. Moreover, it completes the process of efficient feature extraction by irregular convolutional operations and brings more exploration options for convolutional sampled shapes. Object detection experiments on representative datasets COCO2017, VOC 7+12, and VisDrone-DET2021 fully demonstrate the advantages of LDConv. LDConv is a plug-and-play convolutional operation that can replace the convolutional operation to improve network performance. The code for the relevant tasks can be found at https://github.com/CV-ZhangXin/LDConv. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.11587v3-abstract-full').style.display = 'none'; document.getElementById('2311.11587v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 9 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Image and Vision Computing, 105190 (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.08024">arXiv:2311.08024</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.08024">pdf</a>, <a href="https://arxiv.org/format/2311.08024">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> MD-IQA: Learning Multi-scale Distributed Image Quality Assessment with Semi Supervised Learning for Low Dose CT </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tao Song</a>, <a href="/search/cs?searchtype=author&amp;query=Hou%2C+R">Ruizhi Hou</a>, <a href="/search/cs?searchtype=author&amp;query=Dai%2C+L">Lisong Dai</a>, <a href="/search/cs?searchtype=author&amp;query=Xiang%2C+L">Lei Xiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.08024v1-abstract-short" style="display: inline;"> Image quality assessment (IQA) plays a critical role in optimizing radiation dose and developing novel medical imaging techniques in computed tomography (CT). Traditional IQA methods relying on hand-crafted features have limitations in summarizing the subjective perceptual experience of image quality. Recent deep learning-based approaches have demonstrated strong modeling capabilities and potentia&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.08024v1-abstract-full').style.display = 'inline'; document.getElementById('2311.08024v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.08024v1-abstract-full" style="display: none;"> Image quality assessment (IQA) plays a critical role in optimizing radiation dose and developing novel medical imaging techniques in computed tomography (CT). Traditional IQA methods relying on hand-crafted features have limitations in summarizing the subjective perceptual experience of image quality. Recent deep learning-based approaches have demonstrated strong modeling capabilities and potential for medical IQA, but challenges remain regarding model generalization and perceptual accuracy. In this work, we propose a multi-scale distributions regression approach to predict quality scores by constraining the output distribution, thereby improving model generalization. Furthermore, we design a dual-branch alignment network to enhance feature extraction capabilities. Additionally, semi-supervised learning is introduced by utilizing pseudo-labels for unlabeled data to guide model training. Extensive qualitative experiments demonstrate the effectiveness of our proposed method for advancing the state-of-the-art in deep learning-based medical IQA. Code is available at: https://github.com/zunzhumu/MD-IQA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.08024v1-abstract-full').style.display = 'none'; document.getElementById('2311.08024v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.05462">arXiv:2311.05462</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2311.05462">pdf</a>, <a href="https://arxiv.org/format/2311.05462">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> ChatGPT and Other Large Language Models for Cybersecurity of Smart Grid Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zaboli%2C+A">Aydin Zaboli</a>, <a href="/search/cs?searchtype=author&amp;query=Choi%2C+S+L">Seong Lok Choi</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tai-Jin Song</a>, <a href="/search/cs?searchtype=author&amp;query=Hong%2C+J">Junho Hong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.05462v2-abstract-short" style="display: inline;"> Cybersecurity breaches targeting electrical substations constitute a significant threat to the integrity of the power grid, necessitating comprehensive defense and mitigation strategies. Any anomaly in information and communication technology (ICT) should be detected for secure communications between devices in digital substations. This paper proposes large language models (LLM), e.g., ChatGPT, fo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.05462v2-abstract-full').style.display = 'inline'; document.getElementById('2311.05462v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.05462v2-abstract-full" style="display: none;"> Cybersecurity breaches targeting electrical substations constitute a significant threat to the integrity of the power grid, necessitating comprehensive defense and mitigation strategies. Any anomaly in information and communication technology (ICT) should be detected for secure communications between devices in digital substations. This paper proposes large language models (LLM), e.g., ChatGPT, for the cybersecurity of IEC 61850-based digital substation communications. Multicast messages such as generic object oriented system event (GOOSE) and sampled value (SV) are used for case studies. The proposed LLM-based cybersecurity framework includes, for the first time, data pre-processing of communication systems and human-in-the-loop (HITL) training (considering the cybersecurity guidelines recommended by humans). The results show a comparative analysis of detected anomaly data carried out based on the performance evaluation metrics for different LLMs. A hardware-in-the-loop (HIL) testbed is used to generate and extract dataset of IEC 61850 communications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.05462v2-abstract-full').style.display = 'none'; document.getElementById('2311.05462v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 2 figures, Accepted, 2024 IEEE Power &amp; Energy Society General Meeting (PESGM), Seattle, WA, USA</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.19202">arXiv:2310.19202</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.19202">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Improved Motor Imagery Classification Using Adaptive Spatial Filters Based on Particle Swarm Optimization Algorithm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+X">Xiong Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Ying Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tianyuan Song</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+J">Jinguo Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+G">Guixia Kang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.19202v1-abstract-short" style="display: inline;"> As a typical self-paced brain-computer interface (BCI) system, the motor imagery (MI) BCI has been widely applied in fields such as robot control, stroke rehabilitation, and assistance for patients with stroke or spinal cord injury. Many studies have focused on the traditional spatial filters obtained through the common spatial pattern (CSP) method. However, the CSP method can only obtain fixed sp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.19202v1-abstract-full').style.display = 'inline'; document.getElementById('2310.19202v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.19202v1-abstract-full" style="display: none;"> As a typical self-paced brain-computer interface (BCI) system, the motor imagery (MI) BCI has been widely applied in fields such as robot control, stroke rehabilitation, and assistance for patients with stroke or spinal cord injury. Many studies have focused on the traditional spatial filters obtained through the common spatial pattern (CSP) method. However, the CSP method can only obtain fixed spatial filters for specific input signals. Besides, CSP method only focuses on the variance difference of two types of electroencephalogram (EEG) signals, so the decoding ability of EEG signals is limited. To obtain more effective spatial filters for better extraction of spatial features that can improve classification to MI-EEG, this paper proposes an adaptive spatial filter solving method based on particle swarm optimization algorithm (PSO). A training and testing framework based on filter bank and spatial filters (FBCSP-ASP) is designed for MI EEG signal classification. Comparative experiments are conducted on two public datasets (2a and 2b) from BCI competition IV, which show the outstanding average recognition accuracy of FBCSP-ASP. The proposed method has achieved significant performance improvement on MI-BCI. The classification accuracy of the proposed method has reached 74.61% and 81.19% on datasets 2a and 2b, respectively. Compared with the baseline algorithm (FBCSP), the proposed algorithm improves 11.44% and 7.11% on two datasets respectively. Furthermore, the analysis based on mutual information, t-SNE and Shapley values further proves that ASP features have excellent decoding ability for MI-EEG signals, and explains the improvement of classification performance by the introduction of ASP features. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.19202v1-abstract-full').style.display = 'none'; document.getElementById('2310.19202v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.16402">arXiv:2310.16402</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.16402">pdf</a>, <a href="https://arxiv.org/format/2310.16402">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Video Referring Expression Comprehension via Transformer with Content-conditioned Query </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+J">Ji Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+M">Meng Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tengtao Song</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+L">Long Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zou%2C+Y">Yuexian Zou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.16402v1-abstract-short" style="display: inline;"> Video Referring Expression Comprehension (REC) aims to localize a target object in videos based on the queried natural language. Recent improvements in video REC have been made using Transformer-based methods with learnable queries. However, we contend that this naive query design is not ideal given the open-world nature of video REC brought by text supervision. With numerous potential semantic ca&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.16402v1-abstract-full').style.display = 'inline'; document.getElementById('2310.16402v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.16402v1-abstract-full" style="display: none;"> Video Referring Expression Comprehension (REC) aims to localize a target object in videos based on the queried natural language. Recent improvements in video REC have been made using Transformer-based methods with learnable queries. However, we contend that this naive query design is not ideal given the open-world nature of video REC brought by text supervision. With numerous potential semantic categories, relying on only a few slow-updated queries is insufficient to characterize them. Our solution to this problem is to create dynamic queries that are conditioned on both the input video and language to model the diverse objects referred to. Specifically, we place a fixed number of learnable bounding boxes throughout the frame and use corresponding region features to provide prior information. Also, we noticed that current query features overlook the importance of cross-modal alignment. To address this, we align specific phrases in the sentence with semantically relevant visual areas, annotating them in existing video datasets (VID-Sentence and VidSTG). By incorporating these two designs, our proposed model (called ConFormer) outperforms other models on widely benchmarked datasets. For example, in the testing split of VID-Sentence dataset, ConFormer achieves 8.75% absolute improvement on Accu.@0.6 compared to the previous state-of-the-art model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.16402v1-abstract-full').style.display = 'none'; document.getElementById('2310.16402v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ACM International Conference on Multimedia Workshop (ACM MM), 2023. arXiv admin note: substantial text overlap with arXiv:2210.02953</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.16354">arXiv:2310.16354</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.16354">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> RAMPART: RowHammer Mitigation and Repair for Server Memory Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Woo%2C+S+C">Steven C. Woo</a>, <a href="/search/cs?searchtype=author&amp;query=Elsasser%2C+W">Wendy Elsasser</a>, <a href="/search/cs?searchtype=author&amp;query=Hamburg%2C+M">Mike Hamburg</a>, <a href="/search/cs?searchtype=author&amp;query=Linstadt%2C+E">Eric Linstadt</a>, <a href="/search/cs?searchtype=author&amp;query=Miller%2C+M+R">Michael R. Miller</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Taeksang Song</a>, <a href="/search/cs?searchtype=author&amp;query=Tringali%2C+J">James Tringali</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.16354v1-abstract-short" style="display: inline;"> RowHammer attacks are a growing security and reliability concern for DRAMs and computer systems as they can induce many bit errors that overwhelm error detection and correction capabilities. System-level solutions are needed as process technology and circuit improvements alone are unlikely to provide complete protection against RowHammer attacks in the future. This paper introduces RAMPART, a nove&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.16354v1-abstract-full').style.display = 'inline'; document.getElementById('2310.16354v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.16354v1-abstract-full" style="display: none;"> RowHammer attacks are a growing security and reliability concern for DRAMs and computer systems as they can induce many bit errors that overwhelm error detection and correction capabilities. System-level solutions are needed as process technology and circuit improvements alone are unlikely to provide complete protection against RowHammer attacks in the future. This paper introduces RAMPART, a novel approach to mitigating RowHammer attacks and improving server memory system reliability by remapping addresses in each DRAM in a way that confines RowHammer bit flips to a single device for any victim row address. When RAMPART is paired with Single Device Data Correction (SDDC) and patrol scrub, error detection and correction methods in use today, the system can detect and correct bit flips from a successful attack, allowing the memory system to heal itself. RAMPART is compatible with DDR5 RowHammer mitigation features, as well as a wide variety of algorithmic and probabilistic tracking methods. We also introduce BRC-VL, a variation of DDR5 Bounded Refresh Configuration (BRC) that improves system performance by reducing mitigation overhead and show that it works well with probabilistic sampling methods to combat traditional and victim-focused mitigation attacks like Half-Double. The combination of RAMPART, SDDC, and scrubbing enables stronger RowHammer resistance by correcting bit flips from one successful attack. Uncorrectable errors are much less likely, requiring two successful attacks before the memory system is scrubbed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.16354v1-abstract-full').style.display = 'none'; document.getElementById('2310.16354v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 13 figures. A version of this paper will appear in the Proceedings of MEMSYS23</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> B.3.1; B.3.4 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.11715">arXiv:2309.11715</a> <span>&nbsp;&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Deshadow-Anything: When Segment Anything Model Meets Zero-shot shadow removal </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X+F">Xiao Feng Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T+Y">Tian Yi Song</a>, <a href="/search/cs?searchtype=author&amp;query=Yao%2C+J+W">Jia Wei Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.11715v3-abstract-short" style="display: inline;"> Segment Anything (SAM), an advanced universal image segmentation model trained on an expansive visual dataset, has set a new benchmark in image segmentation and computer vision. However, it faced challenges when it came to distinguishing between shadows and their backgrounds. To address this, we developed Deshadow-Anything, considering the generalization of large-scale datasets, and we performed F&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.11715v3-abstract-full').style.display = 'inline'; document.getElementById('2309.11715v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.11715v3-abstract-full" style="display: none;"> Segment Anything (SAM), an advanced universal image segmentation model trained on an expansive visual dataset, has set a new benchmark in image segmentation and computer vision. However, it faced challenges when it came to distinguishing between shadows and their backgrounds. To address this, we developed Deshadow-Anything, considering the generalization of large-scale datasets, and we performed Fine-tuning on large-scale datasets to achieve image shadow removal. The diffusion model can diffuse along the edges and textures of an image, helping to remove shadows while preserving the details of the image. Furthermore, we design Multi-Self-Attention Guidance (MSAG) and adaptive input perturbation (DDPM-AIP) to accelerate the iterative training speed of diffusion. Experiments on shadow removal tasks demonstrate that these methods can effectively improve image restoration performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.11715v3-abstract-full').style.display = 'none'; document.getElementById('2309.11715v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">it needs revised</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.09553">arXiv:2309.09553</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.09553">pdf</a>, <a href="https://arxiv.org/format/2309.09553">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Causal-Story: Local Causal Attention Utilizing Parameter-Efficient Tuning For Visual Story Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tianyi Song</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+J">Jiuxin Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+K">Kun Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+B">Bo Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xiaofeng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.09553v4-abstract-short" style="display: inline;"> The excellent text-to-image synthesis capability of diffusion models has driven progress in synthesizing coherent visual stories. The current state-of-the-art method combines the features of historical captions, historical frames, and the current captions as conditions for generating the current frame. However, this method treats each historical frame and caption as the same contribution. It conne&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.09553v4-abstract-full').style.display = 'inline'; document.getElementById('2309.09553v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.09553v4-abstract-full" style="display: none;"> The excellent text-to-image synthesis capability of diffusion models has driven progress in synthesizing coherent visual stories. The current state-of-the-art method combines the features of historical captions, historical frames, and the current captions as conditions for generating the current frame. However, this method treats each historical frame and caption as the same contribution. It connects them in order with equal weights, ignoring that not all historical conditions are associated with the generation of the current frame. To address this issue, we propose Causal-Story. This model incorporates a local causal attention mechanism that considers the causal relationship between previous captions, frames, and current captions. By assigning weights based on this relationship, Causal-Story generates the current frame, thereby improving the global consistency of story generation. We evaluated our model on the PororoSV and FlintstonesSV datasets and obtained state-of-the-art FID scores, and the generated frames also demonstrate better storytelling in visuals. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.09553v4-abstract-full').style.display = 'none'; document.getElementById('2309.09553v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by 2024 International Conference on Acoustics, Speech and Signal Processing(ICASSP 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.10279">arXiv:2308.10279</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.10279">pdf</a>, <a href="https://arxiv.org/format/2308.10279">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> GPFL: Simultaneously Learning Global and Personalized Feature Information for Personalized Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jianqing Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Hua%2C+Y">Yang Hua</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Tao Song</a>, <a href="/search/cs?searchtype=author&amp;query=Xue%2C+Z">Zhengui Xue</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+R">Ruhui Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+J">Jian Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Guan%2C+H">Haibing Guan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.10279v3-abstract-short" style="display: inline;"> Federated Learning (FL) is popular for its privacy-preserving and collaborative learning capabilities. Recently, personalized FL (pFL) has received attention for its ability to address statistical heterogeneity and achieve personalization in FL. However, from the perspective of feature extraction, most existing pFL methods only focus on extracting global or personalized feature information during&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.10279v3-abstract-full').style.display = 'inline'; document.getElementById('2308.10279v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.10279v3-abstract-full" style="display: none;"> Federated Learning (FL) is popular for its privacy-preserving and collaborative learning capabilities. Recently, personalized FL (pFL) has received attention for its ability to address statistical heterogeneity and achieve personalization in FL. However, from the perspective of feature extraction, most existing pFL methods only focus on extracting global or personalized feature information during local training, which fails to meet the collaborative learning and personalization goals of pFL. To address this, we propose a new pFL method, named GPFL, to simultaneously learn global and personalized feature information on each client. We conduct extensive experiments on six datasets in three statistically heterogeneous settings and show the superiority of GPFL over ten state-of-the-art methods regarding effectiveness, scalability, fairness, stability, and privacy. Besides, GPFL mitigates overfitting and outperforms the baselines by up to 8.99% in accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.10279v3-abstract-full').style.display = 'none'; document.getElementById('2308.10279v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICCV2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.06945">arXiv:2308.06945</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.06945">pdf</a>, <a href="https://arxiv.org/format/2308.06945">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Semantic-aware Network for Aerial-to-Ground Image Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jang%2C+J">Jinhyun Jang</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+T">Taeyong Song</a>, <a href="/search/cs?searchtype=author&amp;query=Sohn%2C+K">Kwanghoon Sohn</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.06945v1-abstract-short" style="display: inline;"> Aerial-to-ground image synthesis is an emerging and challenging problem that aims to synthesize a ground image from an aerial image. Due to the highly different layout and object representation between the aerial and ground images, existing approaches usually fail to transfer the components of the aerial scene into the ground scene. In this paper, we propose a novel framework to explore the challe&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.06945v1-abstract-full').style.display = 'inline'; document.getElementById('2308.06945v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.06945v1-abstract-full" style="display: none;"> Aerial-to-ground image synthesis is an emerging and challenging problem that aims to synthesize a ground image from an aerial image. Due to the highly different layout and object representation between the aerial and ground images, existing approaches usually fail to transfer the components of the aerial scene into the ground scene. In this paper, we propose a novel framework to explore the challenges by imposing enhanced structural alignment and semantic awareness. We introduce a novel semantic-attentive feature transformation module that allows to reconstruct the complex geographic structures by aligning the aerial feature to the ground layout. Furthermore, we propose semantic-aware loss functions by leveraging a pre-trained segmentation network. The network is enforced to synthesize realistic objects across various classes by separately calculating losses for different classes and balancing them. Extensive experiments including comparisons with previous methods and ablation studies show the effectiveness of the proposed framework both qualitatively and quantitatively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.06945v1-abstract-full').style.display = 'none'; document.getElementById('2308.06945v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICIP 2021. Code is available at https://github.com/jinhyunj/SANet</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Song%2C+T&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Song%2C+T&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Song%2C+T&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Song%2C+T&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10