CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 653 results for author: <span class="mathjax">Luo, X</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Luo%2C+X">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Luo, X"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Luo%2C+X&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Luo, X"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Luo%2C+X&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Luo%2C+X&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Luo%2C+X&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Luo%2C+X&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Luo%2C+X&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Luo%2C+X&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11942">arXiv:2411.11942</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.11942">pdf</a>, <a href="https://arxiv.org/format/2411.11942">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Instrumentation and Detectors">physics.ins-det</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Nuclear Experiment">nucl-ex</span> </div> </div> <p class="title is-5 mathjax"> Variable Rate Neural Compression for Sparse Detector Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yi Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Go%2C+Y">Yeonju Go</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+J">Jin Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+S">Shuhang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xihaier Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Marshall%2C+T">Thomas Marshall</a>, <a href="/search/cs?searchtype=author&amp;query=Osborn%2C+J">Joseph Osborn</a>, <a href="/search/cs?searchtype=author&amp;query=Pinkenburg%2C+C">Christopher Pinkenburg</a>, <a href="/search/cs?searchtype=author&amp;query=Ren%2C+Y">Yihui Ren</a>, <a href="/search/cs?searchtype=author&amp;query=Shulga%2C+E">Evgeny Shulga</a>, <a href="/search/cs?searchtype=author&amp;query=Yoo%2C+S">Shinjae Yoo</a>, <a href="/search/cs?searchtype=author&amp;query=Yoon%2C+B">Byung-Jun Yoon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11942v1-abstract-short" style="display: inline;"> High-energy large-scale particle colliders generate data at extraordinary rates. Developing real-time high-throughput data compression algorithms to reduce data volume and meet the bandwidth requirement for storage has become increasingly critical. Deep learning is a promising technology that can address this challenging topic. At the newly constructed sPHENIX experiment at the Relativistic Heavy&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11942v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11942v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11942v1-abstract-full" style="display: none;"> High-energy large-scale particle colliders generate data at extraordinary rates. Developing real-time high-throughput data compression algorithms to reduce data volume and meet the bandwidth requirement for storage has become increasingly critical. Deep learning is a promising technology that can address this challenging topic. At the newly constructed sPHENIX experiment at the Relativistic Heavy Ion Collider, a Time Projection Chamber (TPC) serves as the main tracking detector, which records three-dimensional particle trajectories in a volume of a gas-filled cylinder. In terms of occupancy, the resulting data flow can be very sparse reaching $10^{-3}$ for proton-proton collisions. Such sparsity presents a challenge to conventional learning-free lossy compression algorithms, such as SZ, ZFP, and MGARD. In contrast, emerging deep learning-based models, particularly those utilizing convolutional neural networks for compression, have outperformed these conventional methods in terms of compression ratios and reconstruction accuracy. However, research on the efficacy of these deep learning models in handling sparse datasets, like those produced in particle colliders, remains limited. Furthermore, most deep learning models do not adapt their processing speeds to data sparsity, which affects efficiency. To address this issue, we propose a novel approach for TPC data compression via key-point identification facilitated by sparse convolution. Our proposed algorithm, BCAE-VS, achieves a $75\%$ improvement in reconstruction accuracy with a $10\%$ increase in compression ratio over the previous state-of-the-art model. Additionally, BCAE-VS manages to achieve these results with a model size over two orders of magnitude smaller. Lastly, we have experimentally verified that as sparsity increases, so does the model&#39;s throughput. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11942v1-abstract-full').style.display = 'none'; document.getElementById('2411.11942v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">37 pages, 12 figures, submitted to Journal of Computational Physics</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11739">arXiv:2411.11739</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.11739">pdf</a>, <a href="https://arxiv.org/format/2411.11739">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> QARM: Quantitative Alignment Multi-Modal Recommendation at Kuaishou </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xinchen Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+J">Jiangxia Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+T">Tianyu Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+J">Jinkai Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+R">Rui Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+W">Wei Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+H">Hezheng Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Y">Yichen Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Shiyao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+Q">Qigen Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+C">Changqing Qiu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jiaqi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Yan%2C+Z">Zhiheng Yan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jingming Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Simin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wen%2C+M">Mingxing Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zhaojie Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Gai%2C+K">Kun Gai</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+G">Guorui Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11739v1-abstract-short" style="display: inline;"> In recent years, with the significant evolution of multi-modal large models, many recommender researchers realized the potential of multi-modal information for user interest modeling. In industry, a wide-used modeling architecture is a cascading paradigm: (1) first pre-training a multi-modal model to provide omnipotent representations for downstream services; (2) The downstream recommendation mode&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11739v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11739v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11739v1-abstract-full" style="display: none;"> In recent years, with the significant evolution of multi-modal large models, many recommender researchers realized the potential of multi-modal information for user interest modeling. In industry, a wide-used modeling architecture is a cascading paradigm: (1) first pre-training a multi-modal model to provide omnipotent representations for downstream services; (2) The downstream recommendation model takes the multi-modal representation as additional input to fit real user-item behaviours. Although such paradigm achieves remarkable improvements, however, there still exist two problems that limit model performance: (1) Representation Unmatching: The pre-trained multi-modal model is always supervised by the classic NLP/CV tasks, while the recommendation models are supervised by real user-item interaction. As a result, the two fundamentally different tasks&#39; goals were relatively separate, and there was a lack of consistent objective on their representations; (2) Representation Unlearning: The generated multi-modal representations are always stored in cache store and serve as extra fixed input of recommendation model, thus could not be updated by recommendation model gradient, further unfriendly for downstream training. Inspired by the two difficulties challenges in downstream tasks usage, we introduce a quantitative multi-modal framework to customize the specialized and trainable multi-modal information for different downstream models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11739v1-abstract-full').style.display = 'none'; document.getElementById('2411.11739v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Work in progress</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> N/A </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11061">arXiv:2411.11061</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.11061">pdf</a>, <a href="https://arxiv.org/format/2411.11061">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> Beyond Human-Like Processing: Large Language Models Perform Equivalently on Forward and Backward Scientific Text </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiaoliang Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Ramscar%2C+M">Michael Ramscar</a>, <a href="/search/cs?searchtype=author&amp;query=Love%2C+B+C">Bradley C. Love</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11061v1-abstract-short" style="display: inline;"> The impressive performance of large language models (LLMs) has led to their consideration as models of human language processing. Instead, we suggest that the success of LLMs arises from the flexibility of the transformer learning architecture. To evaluate this conjecture, we trained LLMs on scientific texts that were either in a forward or backward format. Despite backward text being inconsistent&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11061v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11061v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11061v1-abstract-full" style="display: none;"> The impressive performance of large language models (LLMs) has led to their consideration as models of human language processing. Instead, we suggest that the success of LLMs arises from the flexibility of the transformer learning architecture. To evaluate this conjecture, we trained LLMs on scientific texts that were either in a forward or backward format. Despite backward text being inconsistent with the structure of human languages, we found that LLMs performed equally well in either format on a neuroscience benchmark, eclipsing human expert performance for both forward and backward orders. Our results are consistent with the success of transformers across diverse domains, such as weather prediction and protein design. This widespread success is attributable to LLM&#39;s ability to extract predictive patterns from any sufficiently structured input. Given their generality, we suggest caution in interpreting LLM&#39;s success in linguistic tasks as evidence for human-like mechanisms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11061v1-abstract-full').style.display = 'none'; document.getElementById('2411.11061v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09961">arXiv:2411.09961</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.09961">pdf</a>, <a href="https://arxiv.org/format/2411.09961">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> </div> </div> <p class="title is-5 mathjax"> Dense ReLU Neural Networks for Temporal-spatial Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Padilla%2C+C+M+M">Carlos Misael Madrid Padilla</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiaokai Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Padilla%2C+O+H+M">Oscar Hernan Madrid Padilla</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+D">Daren Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09961v2-abstract-short" style="display: inline;"> In this paper, we focus on fully connected deep neural networks utilizing the Rectified Linear Unit (ReLU) activation function for nonparametric estimation. We derive non-asymptotic bounds that lead to convergence rates, addressing both temporal and spatial dependence in the observed measurements. By accounting for dependencies across time and space, our models better reflect the complexities of r&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09961v2-abstract-full').style.display = 'inline'; document.getElementById('2411.09961v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09961v2-abstract-full" style="display: none;"> In this paper, we focus on fully connected deep neural networks utilizing the Rectified Linear Unit (ReLU) activation function for nonparametric estimation. We derive non-asymptotic bounds that lead to convergence rates, addressing both temporal and spatial dependence in the observed measurements. By accounting for dependencies across time and space, our models better reflect the complexities of real-world data, enhancing both predictive performance and theoretical robustness. We also tackle the curse of dimensionality by modeling the data on a manifold, exploring the intrinsic dimensionality of high-dimensional data. We broaden existing theoretical findings of temporal-spatial analysis by applying them to neural networks in more general contexts and demonstrate that our proof techniques are effective for models with short-range dependence. Our empirical simulations across various synthetic response functions underscore the superior performance of our method, outperforming established approaches in the existing literature. These findings provide valuable insights into the strong capabilities of dense neural networks for temporal-spatial modeling across a broad range of function classes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09961v2-abstract-full').style.display = 'none'; document.getElementById('2411.09961v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09148">arXiv:2411.09148</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.09148">pdf</a>, <a href="https://arxiv.org/format/2411.09148">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Toward Democratized Generative AI in Next-Generation Mobile Edge Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+R">Ruichen Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+J">Jiayi He</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiaofeng Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yonghui Li</a>, <a href="/search/cs?searchtype=author&amp;query=Sikdar%2C+B">Biplab Sikdar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09148v1-abstract-short" style="display: inline;"> The rapid development of generative AI technologies, including large language models (LLMs), has brought transformative changes to various fields. However, deploying such advanced models on mobile and edge devices remains challenging due to their high computational, memory, communication, and energy requirements. To address these challenges, we propose a model-centric framework for democratizing g&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09148v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09148v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09148v1-abstract-full" style="display: none;"> The rapid development of generative AI technologies, including large language models (LLMs), has brought transformative changes to various fields. However, deploying such advanced models on mobile and edge devices remains challenging due to their high computational, memory, communication, and energy requirements. To address these challenges, we propose a model-centric framework for democratizing generative AI deployment on mobile and edge networks. First, we comprehensively review key compact model strategies, such as quantization, model pruning, and knowledge distillation, and present key performance metrics to optimize generative AI for mobile deployment. Next, we provide a focused review of mobile and edge networks, emphasizing the specific challenges and requirements of these environments. We further conduct a case study demonstrating the effectiveness of these strategies by deploying LLMs on real mobile edge devices. Experimental results highlight the practicality of democratized LLMs, with significant improvements in generalization accuracy, hallucination rate, accessibility, and resource consumption. Finally, we discuss potential research directions to further advance the deployment of generative AI in resource-constrained environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09148v1-abstract-full').style.display = 'none'; document.getElementById('2411.09148v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07551">arXiv:2411.07551</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.07551">pdf</a>, <a href="https://arxiv.org/format/2411.07551">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> SP-VIO: Robust and Efficient Filter-Based Visual Inertial Odometry with State Transformation Model and Pose-Only Visual Description </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Du%2C+X">Xueyu Du</a>, <a href="/search/cs?searchtype=author&amp;query=Ji%2C+C">Chengjun Ji</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Lilian Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xinchan Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+H">Huaiyi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+M">Maosong Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+W">Wenqi Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Mao%2C+J">Jun Mao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07551v1-abstract-short" style="display: inline;"> Due to the advantages of high computational efficiency and small memory requirements, filter-based visual inertial odometry (VIO) has a good application prospect in miniaturized and payload-constrained embedded systems. However, the filter-based method has the problem of insufficient accuracy. To this end, we propose the State transformation and Pose-only VIO (SP-VIO) by rebuilding the state and m&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07551v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07551v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07551v1-abstract-full" style="display: none;"> Due to the advantages of high computational efficiency and small memory requirements, filter-based visual inertial odometry (VIO) has a good application prospect in miniaturized and payload-constrained embedded systems. However, the filter-based method has the problem of insufficient accuracy. To this end, we propose the State transformation and Pose-only VIO (SP-VIO) by rebuilding the state and measurement models, and considering further visual deprived conditions. In detail, we first proposed a system model based on the double state transformation extended Kalman filter (DST-EKF), which has been proven to have better observability and consistency than the models based on extended Kalman filter (EKF) and state transformation extended Kalman filter (ST-EKF). Secondly, to reduce the influence of linearization error caused by inaccurate 3D reconstruction, we adopt the Pose-only (PO) theory to decouple the measurement model from 3D features. Moreover, to deal with visual deprived conditions, we propose a double state transformation Rauch-Tung-Striebel (DST-RTS) backtracking method to optimize motion trajectories during visual interruption. Experiments on public (EuRoC, Tum-VI, KITTI) and personal datasets show that SP-VIO has better accuracy and efficiency than state-of-the-art (SOTA) VIO algorithms, and has better robustness under visual deprived conditions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07551v1-abstract-full').style.display = 'none'; document.getElementById('2411.07551v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06392">arXiv:2411.06392</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.06392">pdf</a>, <a href="https://arxiv.org/format/2411.06392">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> LSMGraph: A High-Performance Dynamic Graph Storage System with Multi-Level CSR </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yu%2C+S">Song Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+S">Shufeng Gong</a>, <a href="/search/cs?searchtype=author&amp;query=Tao%2C+Q">Qian Tao</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+S">Sijie Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yanfeng Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+W">Wenyuan Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+P">Pengxi Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhixin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hongfu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiaojian Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+G">Ge Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+J">Jingren Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06392v2-abstract-short" style="display: inline;"> The growing volume of graph data may exhaust the main memory. It is crucial to design a disk-based graph storage system to ingest updates and analyze graphs efficiently. However, existing dynamic graph storage systems suffer from read or write amplification and face the challenge of optimizing both read and write performance simultaneously. To address this challenge, we propose LSMGraph, a novel d&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06392v2-abstract-full').style.display = 'inline'; document.getElementById('2411.06392v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06392v2-abstract-full" style="display: none;"> The growing volume of graph data may exhaust the main memory. It is crucial to design a disk-based graph storage system to ingest updates and analyze graphs efficiently. However, existing dynamic graph storage systems suffer from read or write amplification and face the challenge of optimizing both read and write performance simultaneously. To address this challenge, we propose LSMGraph, a novel dynamic graph storage system that combines the write-friendly LSM-tree and the read-friendly CSR. It leverages the multi-level structure of LSM-trees to optimize write performance while utilizing the compact CSR structures embedded in the LSM-trees to boost read performance. LSMGraph uses a new memory structure, MemGraph, to efficiently cache graph updates and uses a multi-level index to speed up reads within the multi-level structure. Furthermore, LSMGraph incorporates a vertex-grained version control mechanism to mitigate the impact of LSM-tree compaction on read performance and ensure the correctness of concurrent read and write operations. Our evaluation shows that LSMGraph significantly outperforms state-of-the-art (graph) storage systems on both graph update and graph analytical workloads. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06392v2-abstract-full').style.display = 'none'; document.getElementById('2411.06392v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06111">arXiv:2411.06111</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.06111">pdf</a>, <a href="https://arxiv.org/format/2411.06111">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Energy-efficient Hybrid Model Predictive Trajectory Planning for Autonomous Electric Vehicles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ding%2C+F">Fan Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xuewen Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+G">Gaoxuan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Tew%2C+H+H">Hwa Hui Tew</a>, <a href="/search/cs?searchtype=author&amp;query=Loo%2C+J+Y">Junn Yong Loo</a>, <a href="/search/cs?searchtype=author&amp;query=Tong%2C+C+W">Chor Wai Tong</a>, <a href="/search/cs?searchtype=author&amp;query=Bakibillah%2C+A+S+M">A. S. M Bakibillah</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Z">Ziyuan Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Tao%2C+Z">Zhiyu Tao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06111v1-abstract-short" style="display: inline;"> To tackle the twin challenges of limited battery life and lengthy charging durations in electric vehicles (EVs), this paper introduces an Energy-efficient Hybrid Model Predictive Planner (EHMPP), which employs an energy-saving optimization strategy. EHMPP focuses on refining the design of the motion planner to be seamlessly integrated with the existing automatic driving algorithms, without additio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06111v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06111v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06111v1-abstract-full" style="display: none;"> To tackle the twin challenges of limited battery life and lengthy charging durations in electric vehicles (EVs), this paper introduces an Energy-efficient Hybrid Model Predictive Planner (EHMPP), which employs an energy-saving optimization strategy. EHMPP focuses on refining the design of the motion planner to be seamlessly integrated with the existing automatic driving algorithms, without additional hardware. It has been validated through simulation experiments on the Prescan, CarSim, and Matlab platforms, demonstrating that it can increase passive recovery energy by 11.74\% and effectively track motor speed and acceleration at optimal power. To sum up, EHMPP not only aids in trajectory planning but also significantly boosts energy efficiency in autonomous EVs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06111v1-abstract-full').style.display = 'none'; document.getElementById('2411.06111v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at the IEEE International Conference on Systems, Man, and Cybernetics (SMC) 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06087">arXiv:2411.06087</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.06087">pdf</a>, <a href="https://arxiv.org/format/2411.06087">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Cross-Domain Transfer Learning using Attention Latent Features for Multi-Agent Trajectory Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Loh%2C+J+Q">Jia Quan Loh</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xuewen Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+F">Fan Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Tew%2C+H+H">Hwa Hui Tew</a>, <a href="/search/cs?searchtype=author&amp;query=Loo%2C+J+Y">Junn Yong Loo</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+Z+Y">Ze Yang Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Susilawati%2C+S">Susilawati Susilawati</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+C+P">Chee Pin Tan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06087v2-abstract-short" style="display: inline;"> With the advancements of sensor hardware, traffic infrastructure and deep learning architectures, trajectory prediction of vehicles has established a solid foundation in intelligent transportation systems. However, existing solutions are often tailored to specific traffic networks at particular time periods. Consequently, deep learning models trained on one network may struggle to generalize effec&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06087v2-abstract-full').style.display = 'inline'; document.getElementById('2411.06087v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06087v2-abstract-full" style="display: none;"> With the advancements of sensor hardware, traffic infrastructure and deep learning architectures, trajectory prediction of vehicles has established a solid foundation in intelligent transportation systems. However, existing solutions are often tailored to specific traffic networks at particular time periods. Consequently, deep learning models trained on one network may struggle to generalize effectively to unseen networks. To address this, we proposed a novel spatial-temporal trajectory prediction framework that performs cross-domain adaption on the attention representation of a Transformer-based model. A graph convolutional network is also integrated to construct dynamic graph feature embeddings that accurately model the complex spatial-temporal interactions between the multi-agent vehicles across multiple traffic domains. The proposed framework is validated on two case studies involving the cross-city and cross-period settings. Experimental results show that our proposed framework achieves superior trajectory prediction and domain adaptation performances over the state-of-the-art models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06087v2-abstract-full').style.display = 'none'; document.getElementById('2411.06087v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at the IEEE International Conference on Systems, Man, and Cybernetics 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04997">arXiv:2411.04997</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.04997">pdf</a>, <a href="https://arxiv.org/format/2411.04997">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LLM2CLIP: Powerful Language Model Unlocks Richer Visual Representation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Huang%2C+W">Weiquan Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+A">Aoqi Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yifan Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xufang Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yuqing Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+L">Liang Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Dai%2C+Q">Qi Dai</a>, <a href="/search/cs?searchtype=author&amp;query=Dai%2C+X">Xiyang Dai</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+D">Dongdong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+C">Chong Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+L">Lili Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04997v2-abstract-short" style="display: inline;"> CLIP is one of the most important multimodal foundational models today. What powers CLIP&#39;s capabilities? The rich supervision signals provided by natural language, the carrier of human knowledge, shape a powerful cross-modal representation space. However, with the rapid advancements in large language models LLMs like GPT-4 and LLaMA, the boundaries of language comprehension and generation are cont&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04997v2-abstract-full').style.display = 'inline'; document.getElementById('2411.04997v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04997v2-abstract-full" style="display: none;"> CLIP is one of the most important multimodal foundational models today. What powers CLIP&#39;s capabilities? The rich supervision signals provided by natural language, the carrier of human knowledge, shape a powerful cross-modal representation space. However, with the rapid advancements in large language models LLMs like GPT-4 and LLaMA, the boundaries of language comprehension and generation are continually being pushed. This raises an intriguing question: can the capabilities of LLMs be harnessed to further improve multimodal representation learning? The potential benefits of incorporating LLMs into CLIP are clear. LLMs&#39; strong textual understanding can fundamentally improve CLIP&#39;s ability to handle image captions, drastically enhancing its ability to process long and complex texts, a well-known limitation of vanilla CLIP. Moreover, LLMs are trained on a vast corpus of text, possessing open-world knowledge. This allows them to expand on caption information during training, increasing the efficiency of the learning process. In this paper, we propose LLM2CLIP, a novel approach that embraces the power of LLMs to unlock CLIP&#39;s potential. By fine-tuning the LLM in the caption space with contrastive learning, we extract its textual capabilities into the output embeddings, significantly improving the output layer&#39;s textual discriminability. We then design an efficient training process where the fine-tuned LLM acts as a powerful teacher for CLIP&#39;s visual encoder. Thanks to the LLM&#39;s presence, we can now incorporate longer and more complex captions without being restricted by vanilla CLIP&#39;s text encoder&#39;s context window and ability limitations. Our experiments demonstrate that this approach brings substantial improvements in cross-modal tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04997v2-abstract-full').style.display = 'none'; document.getElementById('2411.04997v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01600">arXiv:2411.01600</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.01600">pdf</a>, <a href="https://arxiv.org/format/2411.01600">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Graph Fourier Neural ODEs: Bridging Spatial and Temporal Multiscales in Molecular Dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sun%2C+F">Fang Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Z">Zijie Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Haixin Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+Y">Yadi Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiao Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Wei Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yizhou Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01600v1-abstract-short" style="display: inline;"> Molecular dynamics simulations are crucial for understanding complex physical, chemical, and biological processes at the atomic level. However, accurately capturing interactions across multiple spatial and temporal scales remains a significant challenge. We present a novel framework that jointly models spatial and temporal multiscale interactions in molecular dynamics. Our approach leverages Graph&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01600v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01600v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01600v1-abstract-full" style="display: none;"> Molecular dynamics simulations are crucial for understanding complex physical, chemical, and biological processes at the atomic level. However, accurately capturing interactions across multiple spatial and temporal scales remains a significant challenge. We present a novel framework that jointly models spatial and temporal multiscale interactions in molecular dynamics. Our approach leverages Graph Fourier Transforms to decompose molecular structures into different spatial scales and employs Neural Ordinary Differential Equations to model the temporal dynamics in a curated manner influenced by the spatial modes. This unified framework links spatial structures with temporal evolution in a flexible manner, enabling more accurate and comprehensive simulations of molecular systems. We evaluate our model on the MD17 dataset, demonstrating consistent performance improvements over state-of-the-art baselines across multiple molecules, particularly under challenging conditions such as irregular timestep sampling and long-term prediction horizons. Ablation studies confirm the significant contributions of both spatial and temporal multiscale modeling components. Our method advances the simulation of complex molecular systems, potentially accelerating research in computational chemistry, drug discovery, and materials science. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01600v1-abstract-full').style.display = 'none'; document.getElementById('2411.01600v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01431">arXiv:2411.01431</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.01431">pdf</a>, <a href="https://arxiv.org/format/2411.01431">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Efficient Deep Learning Infrastructures for Embedded Computing Systems: A Comprehensive Survey and Future Envision </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiangzhong Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+D">Di Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Kong%2C+H">Hao Kong</a>, <a href="/search/cs?searchtype=author&amp;query=Huai%2C+S">Shuo Huai</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+H">Hui Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+G">Guochu Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+W">Weichen Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01431v1-abstract-short" style="display: inline;"> Deep neural networks (DNNs) have recently achieved impressive success across a wide range of real-world vision and language processing tasks, spanning from image classification to many other downstream vision tasks, such as object detection, tracking, and segmentation. However, previous well-established DNNs, despite being able to maintain superior accuracy, have also been evolving to be deeper an&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01431v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01431v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01431v1-abstract-full" style="display: none;"> Deep neural networks (DNNs) have recently achieved impressive success across a wide range of real-world vision and language processing tasks, spanning from image classification to many other downstream vision tasks, such as object detection, tracking, and segmentation. However, previous well-established DNNs, despite being able to maintain superior accuracy, have also been evolving to be deeper and wider and thus inevitably necessitate prohibitive computational resources for both training and inference. This trend further enlarges the computational gap between computation-intensive DNNs and resource-constrained embedded computing systems, making it challenging to deploy powerful DNNs upon real-world embedded computing systems towards ubiquitous embedded intelligence. To alleviate the above computational gap and enable ubiquitous embedded intelligence, we, in this survey, focus on discussing recent efficient deep learning infrastructures for embedded computing systems, spanning from training to inference, from manual to automated, from convolutional neural networks to transformers, from transformers to vision transformers, from vision models to large language models, from software to hardware, and from algorithms to applications. Specifically, we discuss recent efficient deep learning infrastructures for embedded computing systems from the lens of (1) efficient manual network design for embedded computing systems, (2) efficient automated network design for embedded computing systems, (3) efficient network compression for embedded computing systems, (4) efficient on-device learning for embedded computing systems, (5) efficient large language models for embedded computing systems, (6) efficient deep learning software and hardware for embedded computing systems, and (7) efficient intelligent applications for embedded computing systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01431v1-abstract-full').style.display = 'none'; document.getElementById('2411.01431v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ACM Transactions on Embedded Computing Systems (TECS) 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00845">arXiv:2411.00845</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.00845">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> End-to-end Graph Learning Approach for Cognitive Diagnosis of Student Tutorial </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+F">Fulai Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+D">Di Wu</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+Y">Yi He</a>, <a href="/search/cs?searchtype=author&amp;query=Tao%2C+L">Li Tao</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xin Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00845v1-abstract-short" style="display: inline;"> Cognitive diagnosis (CD) utilizes students&#39; existing studying records to estimate their mastery of unknown knowledge concepts, which is vital for evaluating their learning abilities. Accurate CD is extremely challenging because CD is associated with complex relationships and mechanisms among students, knowledge concepts, studying records, etc. However, existing approaches loosely consider these re&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00845v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00845v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00845v1-abstract-full" style="display: none;"> Cognitive diagnosis (CD) utilizes students&#39; existing studying records to estimate their mastery of unknown knowledge concepts, which is vital for evaluating their learning abilities. Accurate CD is extremely challenging because CD is associated with complex relationships and mechanisms among students, knowledge concepts, studying records, etc. However, existing approaches loosely consider these relationships and mechanisms by a non-end-to-end learning framework, resulting in sub-optimal feature extractions and fusions for CD. Different from them, this paper innovatively proposes an End-to-end Graph Neural Networks-based Cognitive Diagnosis (EGNN-CD) model. EGNN-CD consists of three main parts: knowledge concept network (KCN), graph neural networks-based feature extraction (GNNFE), and cognitive ability prediction (CAP). First, KCN constructs CD-related interaction by comprehensively extracting physical information from students, exercises, and knowledge concepts. Second, a four-channel GNNFE is designed to extract high-order and individual features from the constructed KCN. Finally, CAP employs a multi-layer perceptron to fuse the extracted features to predict students&#39; learning abilities in an end-to-end learning way. With such designs, the feature extractions and fusions are guaranteed to be comprehensive and optimal for CD. Extensive experiments on three real datasets demonstrate that our EGNN-CD achieves significantly higher accuracy than state-of-the-art models in CD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00845v1-abstract-full').style.display = 'none'; document.getElementById('2411.00845v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16624">arXiv:2410.16624</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.16624">pdf</a>, <a href="https://arxiv.org/ps/2410.16624">ps</a>, <a href="https://arxiv.org/format/2410.16624">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> EVC-MF: End-to-end Video Captioning Network with Multi-scale Features </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Niu%2C+T">Tian-Zi Niu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zhen-Duo Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xin Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+X">Xin-Shun Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16624v1-abstract-short" style="display: inline;"> Conventional approaches for video captioning leverage a variety of offline-extracted features to generate captions. Despite the availability of various offline-feature-extractors that offer diverse information from different perspectives, they have several limitations due to fixed parameters. Concretely, these extractors are solely pre-trained on image/video comprehension tasks, making them less a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16624v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16624v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16624v1-abstract-full" style="display: none;"> Conventional approaches for video captioning leverage a variety of offline-extracted features to generate captions. Despite the availability of various offline-feature-extractors that offer diverse information from different perspectives, they have several limitations due to fixed parameters. Concretely, these extractors are solely pre-trained on image/video comprehension tasks, making them less adaptable to video caption datasets. Additionally, most of these extractors only capture features prior to the classifier of the pre-training task, ignoring a significant amount of valuable shallow information. Furthermore, employing multiple offline-features may introduce redundant information. To address these issues, we propose an end-to-end encoder-decoder-based network (EVC-MF) for video captioning, which efficiently utilizes multi-scale visual and textual features to generate video descriptions. Specifically, EVC-MF consists of three modules. Firstly, instead of relying on multiple feature extractors, we directly feed video frames into a transformer-based network to obtain multi-scale visual features and update feature extractor parameters. Secondly, we fuse the multi-scale features and input them into a masked encoder to reduce redundancy and encourage learning useful features. Finally, we utilize an enhanced transformer-based decoder, which can efficiently leverage shallow textual information, to generate video descriptions. To evaluate our proposed model, we conduct extensive experiments on benchmark datasets. The results demonstrate that EVC-MF yields competitive performance compared with the state-of-theart methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16624v1-abstract-full').style.display = 'none'; document.getElementById('2410.16624v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16606">arXiv:2410.16606</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.16606">pdf</a>, <a href="https://arxiv.org/format/2410.16606">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TPAMI.2024.3416372">10.1109/TPAMI.2024.3416372 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> GALA: Graph Diffusion-based Alignment with Jigsaw for Source-free Domain Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luo%2C+J">Junyu Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+Y">Yiyang Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiao Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Ju%2C+W">Wei Ju</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Z">Zhiping Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yusheng Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+J">Jingyang Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+M">Ming Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16606v1-abstract-short" style="display: inline;"> Source-free domain adaptation is a crucial machine learning topic, as it contains numerous applications in the real world, particularly with respect to data privacy. Existing approaches predominantly focus on Euclidean data, such as images and videos, while the exploration of non-Euclidean graph data remains scarce. Recent graph neural network (GNN) approaches can suffer from serious performance d&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16606v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16606v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16606v1-abstract-full" style="display: none;"> Source-free domain adaptation is a crucial machine learning topic, as it contains numerous applications in the real world, particularly with respect to data privacy. Existing approaches predominantly focus on Euclidean data, such as images and videos, while the exploration of non-Euclidean graph data remains scarce. Recent graph neural network (GNN) approaches can suffer from serious performance decline due to domain shift and label scarcity in source-free adaptation scenarios. In this study, we propose a novel method named Graph Diffusion-based Alignment with Jigsaw (GALA), tailored for source-free graph domain adaptation. To achieve domain alignment, GALA employs a graph diffusion model to reconstruct source-style graphs from target data. Specifically, a score-based graph diffusion model is trained using source graphs to learn the generative source styles. Then, we introduce perturbations to target graphs via a stochastic differential equation instead of sampling from a prior, followed by the reverse process to reconstruct source-style graphs. We feed the source-style graphs into an off-the-shelf GNN and introduce class-specific thresholds with curriculum learning, which can generate accurate and unbiased pseudo-labels for target graphs. Moreover, we develop a simple yet effective graph-mixing strategy named graph jigsaw to combine confident graphs and unconfident graphs, which can enhance generalization capabilities and robustness via consistency learning. Extensive experiments on benchmark datasets validate the effectiveness of GALA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16606v1-abstract-full').style.display = 'none'; document.getElementById('2410.16606v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE TPAMI</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14745">arXiv:2410.14745</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.14745">pdf</a>, <a href="https://arxiv.org/format/2410.14745">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SemiEvol: Semi-supervised Fine-tuning for LLM Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luo%2C+J">Junyu Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiao Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xiusi Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Z">Zhiping Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Ju%2C+W">Wei Ju</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+M">Ming Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14745v1-abstract-short" style="display: inline;"> Supervised fine-tuning (SFT) is crucial in adapting large language models (LLMs) to a specific domain or task. However, only a limited amount of labeled data is available in practical applications, which poses a severe challenge for SFT in yielding satisfactory results. Therefore, a data-efficient framework that can fully exploit labeled and unlabeled data for LLM fine-tuning is highly anticipated&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14745v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14745v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14745v1-abstract-full" style="display: none;"> Supervised fine-tuning (SFT) is crucial in adapting large language models (LLMs) to a specific domain or task. However, only a limited amount of labeled data is available in practical applications, which poses a severe challenge for SFT in yielding satisfactory results. Therefore, a data-efficient framework that can fully exploit labeled and unlabeled data for LLM fine-tuning is highly anticipated. Towards this end, we introduce a semi-supervised fine-tuning framework named SemiEvol for LLM adaptation from a propagate-and-select manner. For knowledge propagation, SemiEvol adopts a bi-level approach, propagating knowledge from labeled data to unlabeled data through both in-weight and in-context methods. For knowledge selection, SemiEvol incorporates a collaborative learning mechanism, selecting higher-quality pseudo-response samples. We conducted experiments using GPT-4o-mini and Llama-3.1 on seven general or domain-specific datasets, demonstrating significant improvements in model performance on target data. Furthermore, we compared SemiEvol with SFT and self-evolution methods, highlighting its practicality in hybrid data scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14745v1-abstract-full').style.display = 'none'; document.getElementById('2410.14745v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12210">arXiv:2410.12210</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.12210">pdf</a>, <a href="https://arxiv.org/format/2410.12210">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3658644.3690243">10.1145/3658644.3690243 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> fAmulet: Finding Finalization Failure Bugs in Polygon zkRollup </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zihao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+X">Xinghao Peng</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+Z">Zheyuan He</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiapu Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+T">Ting Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12210v1-abstract-short" style="display: inline;"> Zero-knowledge layer 2 protocols emerge as a compelling approach to overcoming blockchain scalability issues by processing transactions through the transaction finalization process. During this process, transactions are efficiently processed off the main chain. Besides, both the transaction data and the zero-knowledge proofs of transaction executions are reserved on the main chain, ensuring the av&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12210v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12210v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12210v1-abstract-full" style="display: none;"> Zero-knowledge layer 2 protocols emerge as a compelling approach to overcoming blockchain scalability issues by processing transactions through the transaction finalization process. During this process, transactions are efficiently processed off the main chain. Besides, both the transaction data and the zero-knowledge proofs of transaction executions are reserved on the main chain, ensuring the availability of transaction data as well as the correctness and verifiability of transaction executions. Hence, any bugs that cause the transaction finalization failure are crucial, as they impair the usability of these protocols and the scalability of blockchains. In this work, we conduct the first systematic study on finalization failure bugs in zero-knowledge layer 2 protocols, and define two kinds of such bugs. Besides, we design fAmulet, the first tool to detect finalization failure bugs in Polygon zkRollup, a prominent zero-knowledge layer 2 protocol, by leveraging fuzzing testing. To trigger finalization failure bugs effectively, we introduce a finalization behavior model to guide our transaction fuzzer to generate and mutate transactions for inducing diverse behaviors across each component (e.g., Sequencer) in the finalization process. Moreover, we define bug oracles according to the distinct bug definitions to accurately detect bugs. Through our evaluation, fAmulet can uncover twelve zero-day finalization failure bugs in Polygon zkRollup, and cover at least 20.8% more branches than baselines. Furthermore, through our preliminary study, fAmulet uncovers a zero-day finalization failure bug in Scroll zkRollup, highlighting the generality of fAmulet to be applied to other zero-knowledge layer 2 protocols. At the time of writing, all our uncovered bugs have been confirmed and fixed by Polygon zkRollup and Scroll zkRollup teams. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12210v1-abstract-full').style.display = 'none'; document.getElementById('2410.12210v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This submission serves as our full paper version with the appendix</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11708">arXiv:2410.11708</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.11708">pdf</a>, <a href="https://arxiv.org/format/2410.11708">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3646547.3688451">10.1145/3646547.3688451 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> The Age of DDoScovery: An Empirical Comparison of Industry and Academic DDoS Assessments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hiesgen%2C+R">Raphael Hiesgen</a>, <a href="/search/cs?searchtype=author&amp;query=Nawrocki%2C+M">Marcin Nawrocki</a>, <a href="/search/cs?searchtype=author&amp;query=Barcellos%2C+M">Marinho Barcellos</a>, <a href="/search/cs?searchtype=author&amp;query=Kopp%2C+D">Daniel Kopp</a>, <a href="/search/cs?searchtype=author&amp;query=Hohlfeld%2C+O">Oliver Hohlfeld</a>, <a href="/search/cs?searchtype=author&amp;query=Chan%2C+E">Echo Chan</a>, <a href="/search/cs?searchtype=author&amp;query=Dobbins%2C+R">Roland Dobbins</a>, <a href="/search/cs?searchtype=author&amp;query=Doerr%2C+C">Christian Doerr</a>, <a href="/search/cs?searchtype=author&amp;query=Rossow%2C+C">Christian Rossow</a>, <a href="/search/cs?searchtype=author&amp;query=Thomas%2C+D+R">Daniel R. Thomas</a>, <a href="/search/cs?searchtype=author&amp;query=Jonker%2C+M">Mattijs Jonker</a>, <a href="/search/cs?searchtype=author&amp;query=Mok%2C+R">Ricky Mok</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiapu Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Kristoff%2C+J">John Kristoff</a>, <a href="/search/cs?searchtype=author&amp;query=Schmidt%2C+T+C">Thomas C. Schmidt</a>, <a href="/search/cs?searchtype=author&amp;query=W%C3%A4hlisch%2C+M">Matthias W盲hlisch</a>, <a href="/search/cs?searchtype=author&amp;query=claffy%2C+k">kc claffy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11708v2-abstract-short" style="display: inline;"> Motivated by the impressive but diffuse scope of DDoS research and reporting, we undertake a multistakeholder (joint industry-academic) analysis to seek convergence across the best available macroscopic views of the relative trends in two dominant classes of attacks - direct-path attacks and reflection-amplification attacks. We first analyze 24 industry reports to extract trends and (in)consistenc&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11708v2-abstract-full').style.display = 'inline'; document.getElementById('2410.11708v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11708v2-abstract-full" style="display: none;"> Motivated by the impressive but diffuse scope of DDoS research and reporting, we undertake a multistakeholder (joint industry-academic) analysis to seek convergence across the best available macroscopic views of the relative trends in two dominant classes of attacks - direct-path attacks and reflection-amplification attacks. We first analyze 24 industry reports to extract trends and (in)consistencies across observations by commercial stakeholders in 2022. We then analyze ten data sets spanning industry and academic sources, across four years (2019-2023), to find and explain discrepancies based on data sources, vantage points, methods, and parameters. Our method includes a new approach: we share an aggregated list of DDoS targets with industry players who return the results of joining this list with their proprietary data sources to reveal gaps in visibility of the academic data sources. We use academic data sources to explore an industry-reported relative drop in spoofed reflection-amplification attacks in 2021-2022. Our study illustrates the value, but also the challenge, in independent validation of security-related properties of Internet infrastructure. Finally, we reflect on opportunities to facilitate greater common understanding of the DDoS landscape. We hope our results inform not only future academic and industry pursuits but also emerging policy efforts to reduce systemic Internet security vulnerabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11708v2-abstract-full').style.display = 'none'; document.getElementById('2410.11708v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">camera-ready</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of ACM Internet Measurement Conference (IMC), 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10244">arXiv:2410.10244</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.10244">pdf</a>, <a href="https://arxiv.org/format/2410.10244">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Capture Artifacts via Progressive Disentangling and Purifying Blended Identities for Deepfake Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+W">Weijie Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiaoqing Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhancheng Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+J">Jiachen He</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+X">Xiaojun Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10244v2-abstract-short" style="display: inline;"> The Deepfake technology has raised serious concerns regarding privacy breaches and trust issues. To tackle these challenges, Deepfake detection technology has emerged. Current methods over-rely on the global feature space, which contains redundant information independent of the artifacts. As a result, existing Deepfake detection techniques suffer performance degradation when encountering unknown d&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10244v2-abstract-full').style.display = 'inline'; document.getElementById('2410.10244v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10244v2-abstract-full" style="display: none;"> The Deepfake technology has raised serious concerns regarding privacy breaches and trust issues. To tackle these challenges, Deepfake detection technology has emerged. Current methods over-rely on the global feature space, which contains redundant information independent of the artifacts. As a result, existing Deepfake detection techniques suffer performance degradation when encountering unknown datasets. To reduce information redundancy, the current methods use disentanglement techniques to roughly separate the fake faces into artifacts and content information. However, these methods lack a solid disentanglement foundation and cannot guarantee the reliability of their disentangling process. To address these issues, a Deepfake detection method based on progressive disentangling and purifying blended identities is innovatively proposed in this paper. Based on the artifact generation mechanism, the coarse- and fine-grained strategies are combined to ensure the reliability of the disentanglement method. Our method aims to more accurately capture and separate artifact features in fake faces. Specifically, we first perform the coarse-grained disentangling on fake faces to obtain a pair of blended identities that require no additional annotation to distinguish between source face and target face. Then, the artifact features from each identity are separated to achieve fine-grained disentanglement. To obtain pure identity information and artifacts, an Identity-Artifact Correlation Compression module (IACC) is designed based on the information bottleneck theory, effectively reducing the potential correlation between identity information and artifacts. Additionally, an Identity-Artifact Separation Contrast Loss is designed to enhance the independence of artifact features post-disentangling. Finally, the classifier only focuses on pure artifact features to achieve a generalized Deepfake detector. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10244v2-abstract-full').style.display = 'none'; document.getElementById('2410.10244v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">TCSVT(Under Review)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06366">arXiv:2410.06366</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.06366">pdf</a>, <a href="https://arxiv.org/format/2410.06366">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Physics-Informed Regularization for Domain-Agnostic Dynamical System Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Z">Zijie Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+W">Wanjia Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+J">Jingdong Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+Z">Ziniu Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiao Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+Y">Yadi Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yuanzhou Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yizhou Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Wei Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06366v1-abstract-short" style="display: inline;"> Learning complex physical dynamics purely from data is challenging due to the intrinsic properties of systems to be satisfied. Incorporating physics-informed priors, such as in Hamiltonian Neural Networks (HNNs), achieves high-precision modeling for energy-conservative systems. However, real-world systems often deviate from strict energy conservation and follow different physical priors. To addres&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06366v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06366v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06366v1-abstract-full" style="display: none;"> Learning complex physical dynamics purely from data is challenging due to the intrinsic properties of systems to be satisfied. Incorporating physics-informed priors, such as in Hamiltonian Neural Networks (HNNs), achieves high-precision modeling for energy-conservative systems. However, real-world systems often deviate from strict energy conservation and follow different physical priors. To address this, we present a framework that achieves high-precision modeling for a wide range of dynamical systems from the numerical aspect, by enforcing Time-Reversal Symmetry (TRS) via a novel regularization term. It helps preserve energies for conservative systems while serving as a strong inductive bias for non-conservative, reversible systems. While TRS is a domain-specific physical prior, we present the first theoretical proof that TRS loss can universally improve modeling accuracy by minimizing higher-order Taylor terms in ODE integration, which is numerically beneficial to various systems regardless of their properties, even for irreversible systems. By integrating the TRS loss within neural ordinary differential equation models, the proposed model TREAT demonstrates superior performance on diverse physical systems. It achieves a significant 11.5% MSE improvement in a challenging chaotic triple-pendulum scenario, underscoring TREAT&#39;s broad applicability and effectiveness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06366v1-abstract-full').style.display = 'none'; document.getElementById('2410.06366v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to The Thirty-eighth Annual Conference on Neural Information Processing Systems (NeurIPS 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00363">arXiv:2410.00363</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2410.00363">pdf</a>, <a href="https://arxiv.org/format/2410.00363">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Unleashing the Potentials of Likelihood Composition for Multi-modal Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+S">Shitian Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+R">Renrui Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xu Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Shanghang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+P">Peng Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00363v1-abstract-short" style="display: inline;"> Model fusing has always been an important topic, especially in an era where large language models (LLM) and multi-modal language models (MLM) with different architectures, parameter sizes and training pipelines, are being created all the time. In this work, we propose a post-hoc framework, aiming at fusing heterogeneous models off-the-shell, which we call \textit{likelihood composition}, and the b&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00363v1-abstract-full').style.display = 'inline'; document.getElementById('2410.00363v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00363v1-abstract-full" style="display: none;"> Model fusing has always been an important topic, especially in an era where large language models (LLM) and multi-modal language models (MLM) with different architectures, parameter sizes and training pipelines, are being created all the time. In this work, we propose a post-hoc framework, aiming at fusing heterogeneous models off-the-shell, which we call \textit{likelihood composition}, and the basic idea is to compose multiple models&#39; likelihood distribution when doing a multi-choice visual-question-answering task. Here the core concept, \textit{likelihood}, is actually the log-probability of the candidate answer. In \textit{likelihood composition}, we introduce some basic operations: \textit{debias}, \textit{highlight}, \textit{majority-vote} and \textit{ensemble}. By combining (composing) these basic elements, we get the mixed composition methods: \textit{mix-composition}. Through conducting comprehensive experiments on 9 VQA datasets and 10 MLMs, we prove the effectiveness of \textit{mix-composition} compared with simple \textit{ensemble} or \textit{majority-vote} methods. In this framework, people can propose new basic composition methods and combine them to get the new mixed composition methods. We hope our proposed \textit{likelihood composition} can provide a new perspective of fusing heterogeneous models and inspire the exploration under this framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00363v1-abstract-full').style.display = 'none'; document.getElementById('2410.00363v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19720">arXiv:2409.19720</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.19720">pdf</a>, <a href="https://arxiv.org/format/2409.19720">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FAST: A Dual-tier Few-Shot Learning Paradigm for Whole Slide Image Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Fu%2C+K">Kexue Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiaoyuan Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Qu%2C+L">Linhao Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Shuo Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+Y">Ying Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Maglogiannis%2C+I">Ilias Maglogiannis</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+L">Longxiang Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+M">Manning Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19720v1-abstract-short" style="display: inline;"> The expensive fine-grained annotation and data scarcity have become the primary obstacles for the widespread adoption of deep learning-based Whole Slide Images (WSI) classification algorithms in clinical practice. Unlike few-shot learning methods in natural images that can leverage the labels of each image, existing few-shot WSI classification methods only utilize a small number of fine-grained la&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19720v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19720v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19720v1-abstract-full" style="display: none;"> The expensive fine-grained annotation and data scarcity have become the primary obstacles for the widespread adoption of deep learning-based Whole Slide Images (WSI) classification algorithms in clinical practice. Unlike few-shot learning methods in natural images that can leverage the labels of each image, existing few-shot WSI classification methods only utilize a small number of fine-grained labels or weakly supervised slide labels for training in order to avoid expensive fine-grained annotation. They lack sufficient mining of available WSIs, severely limiting WSI classification performance. To address the above issues, we propose a novel and efficient dual-tier few-shot learning paradigm for WSI classification, named FAST. FAST consists of a dual-level annotation strategy and a dual-branch classification framework. Firstly, to avoid expensive fine-grained annotation, we collect a very small number of WSIs at the slide level, and annotate an extremely small number of patches. Then, to fully mining the available WSIs, we use all the patches and available patch labels to build a cache branch, which utilizes the labeled patches to learn the labels of unlabeled patches and through knowledge retrieval for patch classification. In addition to the cache branch, we also construct a prior branch that includes learnable prompt vectors, using the text encoder of visual-language models for patch classification. Finally, we integrate the results from both branches to achieve WSI classification. Extensive experiments on binary and multi-class datasets demonstrate that our proposed method significantly surpasses existing few-shot classification methods and approaches the accuracy of fully supervised methods with only 0.22$\%$ annotation costs. All codes and models will be publicly available on https://github.com/fukexue/FAST. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19720v1-abstract-full').style.display = 'none'; document.getElementById('2409.19720v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.18986">arXiv:2409.18986</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.18986">pdf</a>, <a href="https://arxiv.org/format/2409.18986">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Lab-AI -- Retrieval-Augmented Language Model for Personalized Lab Test Interpretation in Clinical Medicine </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xiaoyu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Ouyang%2C+H">Haoyong Ouyang</a>, <a href="/search/cs?searchtype=author&amp;query=Bhasuran%2C+B">Balu Bhasuran</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiao Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Hanna%2C+K">Karim Hanna</a>, <a href="/search/cs?searchtype=author&amp;query=Lustria%2C+M+L+A">Mia Liza A. Lustria</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+Z">Zhe He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.18986v1-abstract-short" style="display: inline;"> Accurate interpretation of lab results is crucial in clinical medicine, yet most patient portals use universal normal ranges, ignoring factors like age and gender. This study introduces Lab-AI, an interactive system that offers personalized normal ranges using Retrieval-Augmented Generation (RAG) from credible health sources. Lab-AI has two modules: factor retrieval and normal range retrieval. We&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18986v1-abstract-full').style.display = 'inline'; document.getElementById('2409.18986v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.18986v1-abstract-full" style="display: none;"> Accurate interpretation of lab results is crucial in clinical medicine, yet most patient portals use universal normal ranges, ignoring factors like age and gender. This study introduces Lab-AI, an interactive system that offers personalized normal ranges using Retrieval-Augmented Generation (RAG) from credible health sources. Lab-AI has two modules: factor retrieval and normal range retrieval. We tested these on 68 lab tests-30 with conditional factors and 38 without. For tests with factors, normal ranges depend on patient-specific information. Our results show that GPT-4-turbo with RAG achieved a 0.95 F1 score for factor retrieval and 0.993 accuracy for normal range retrieval. GPT-4-turbo with RAG outperformed the best non-RAG system by 29.1% in factor retrieval and showed 60.9% and 52.9% improvements in question-level and lab-level performance, respectively, for normal range retrieval. These findings highlight Lab-AI&#39;s potential to enhance patient understanding of lab results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18986v1-abstract-full').style.display = 'none'; document.getElementById('2409.18986v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17367">arXiv:2409.17367</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.17367">pdf</a>, <a href="https://arxiv.org/format/2409.17367">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/MLSP58920.2024.10734742">10.1109/MLSP58920.2024.10734742 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Implicit Neural Representations for Simultaneous Reduction and Continuous Reconstruction of Multi-Altitude Climate Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qayyum%2C+A+B+A">Alif Bin Abdul Qayyum</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xihaier Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Urban%2C+N+M">Nathan M. Urban</a>, <a href="/search/cs?searchtype=author&amp;query=Qian%2C+X">Xiaoning Qian</a>, <a href="/search/cs?searchtype=author&amp;query=Yoon%2C+B">Byung-Jun Yoon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17367v1-abstract-short" style="display: inline;"> The world is moving towards clean and renewable energy sources, such as wind energy, in an attempt to reduce greenhouse gas emissions that contribute to global warming. To enhance the analysis and storage of wind data, we introduce a deep learning framework designed to simultaneously enable effective dimensionality reduction and continuous representation of multi-altitude wind data from discrete o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17367v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17367v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17367v1-abstract-full" style="display: none;"> The world is moving towards clean and renewable energy sources, such as wind energy, in an attempt to reduce greenhouse gas emissions that contribute to global warming. To enhance the analysis and storage of wind data, we introduce a deep learning framework designed to simultaneously enable effective dimensionality reduction and continuous representation of multi-altitude wind data from discrete observations. The framework consists of three key components: dimensionality reduction, cross-modal prediction, and super-resolution. We aim to: (1) improve data resolution across diverse climatic conditions to recover high-resolution details; (2) reduce data dimensionality for more efficient storage of large climate datasets; and (3) enable cross-prediction between wind data measured at different heights. Comprehensive testing confirms that our approach surpasses existing methods in both super-resolution quality and compression efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17367v1-abstract-full').style.display = 'none'; document.getElementById('2409.17367v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2401.16936</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15695">arXiv:2409.15695</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.15695">pdf</a>, <a href="https://arxiv.org/format/2409.15695">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Toward Mixture-of-Experts Enabled Trustworthy Semantic Communication for 6G Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=He%2C+J">Jiayi He</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiaofeng Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+Z">Zehui Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+C">Ci Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+X">Xuemin Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15695v1-abstract-short" style="display: inline;"> Semantic Communication (SemCom) plays a pivotal role in 6G networks, offering a viable solution for future efficient communication. Deep Learning (DL)-based semantic codecs further enhance this efficiency. However, the vulnerability of DL models to security threats, such as adversarial attacks, poses significant challenges for practical applications of SemCom systems. These vulnerabilities enable&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15695v1-abstract-full').style.display = 'inline'; document.getElementById('2409.15695v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15695v1-abstract-full" style="display: none;"> Semantic Communication (SemCom) plays a pivotal role in 6G networks, offering a viable solution for future efficient communication. Deep Learning (DL)-based semantic codecs further enhance this efficiency. However, the vulnerability of DL models to security threats, such as adversarial attacks, poses significant challenges for practical applications of SemCom systems. These vulnerabilities enable attackers to tamper with messages and eavesdrop on private information, especially in wireless communication scenarios. Although existing defenses attempt to address specific threats, they often fail to simultaneously handle multiple heterogeneous attacks. To overcome this limitation, we introduce a novel Mixture-of-Experts (MoE)-based SemCom system. This system comprises a gating network and multiple experts, each specializing in different security challenges. The gating network adaptively selects suitable experts to counter heterogeneous attacks based on user-defined security requirements. Multiple experts collaborate to accomplish semantic communication tasks while meeting the security requirements of users. A case study in vehicular networks demonstrates the efficacy of the MoE-based SemCom system. Simulation results show that the proposed MoE-based SemCom system effectively mitigates concurrent heterogeneous attacks, with minimal impact on downstream task accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15695v1-abstract-full').style.display = 'none'; document.getElementById('2409.15695v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14876">arXiv:2409.14876</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.14876">pdf</a>, <a href="https://arxiv.org/format/2409.14876">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Mammo-Clustering:A Weakly Supervised Multi-view Global-Local Context Clustering Network for Detection and Classification in Mammography </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+S">Shilong Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Chulong Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zang%2C+Q">Qi Zang</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+J">Juan Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Zeng%2C+L">Liang Zeng</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiao Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Xing%2C+Y">Yexuan Xing</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+X">Xin Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Q">Qi Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liang%2C+X">Xiaokun Liang</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+Y">Yaoqin Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14876v1-abstract-short" style="display: inline;"> Breast cancer has long posed a significant threat to women&#39;s health, making early screening crucial for mitigating its impact. However, mammography, the preferred method for early screening, faces limitations such as the burden of double reading by radiologists, challenges in widespread adoption in remote and underdeveloped areas, and obstacles in intelligent early screening development due to dat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14876v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14876v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14876v1-abstract-full" style="display: none;"> Breast cancer has long posed a significant threat to women&#39;s health, making early screening crucial for mitigating its impact. However, mammography, the preferred method for early screening, faces limitations such as the burden of double reading by radiologists, challenges in widespread adoption in remote and underdeveloped areas, and obstacles in intelligent early screening development due to data constraints. To address these challenges, we propose a weakly supervised multi-view mammography early screening model for breast cancer based on context clustering. Context clustering, a feature extraction structure that is neither CNN nor transformer, combined with multi-view learning for information complementation, presents a promising approach. The weak supervision design specifically addresses data limitations. Our model achieves state-of-the-art performance with fewer parameters on two public datasets, with an AUC of 0.828 on the Vindr-Mammo dataset and 0.805 on the CBIS-DDSM dataset. Our model shows potential in reducing the burden on doctors and increasing the feasibility of breast cancer screening for women in underdeveloped regions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14876v1-abstract-full').style.display = 'none'; document.getElementById('2409.14876v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13349">arXiv:2409.13349</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.13349">pdf</a>, <a href="https://arxiv.org/format/2409.13349">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> ID-Guard: A Universal Framework for Combating Facial Manipulation via Breaking Identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qu%2C+Z">Zuomin Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+W">Wei Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiangyang Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Q">Qian Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+X">Xiaochun Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13349v1-abstract-short" style="display: inline;"> The misuse of deep learning-based facial manipulation poses a potential threat to civil rights. To prevent this fraud at its source, proactive defense technology was proposed to disrupt the manipulation process by adding invisible adversarial perturbations into images, making the forged output unconvincing to the observer. However, their non-directional disruption of the output may result in the r&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13349v1-abstract-full').style.display = 'inline'; document.getElementById('2409.13349v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13349v1-abstract-full" style="display: none;"> The misuse of deep learning-based facial manipulation poses a potential threat to civil rights. To prevent this fraud at its source, proactive defense technology was proposed to disrupt the manipulation process by adding invisible adversarial perturbations into images, making the forged output unconvincing to the observer. However, their non-directional disruption of the output may result in the retention of identity information of the person in the image, leading to stigmatization of the individual. In this paper, we propose a novel universal framework for combating facial manipulation, called ID-Guard. Specifically, this framework requires only a single forward pass of an encoder-decoder network to generate a cross-model universal adversarial perturbation corresponding to a specific facial image. To ensure anonymity in manipulated facial images, a novel Identity Destruction Module (IDM) is introduced to destroy the identifiable information in forged faces targetedly. Additionally, we optimize the perturbations produced by considering the disruption towards different facial manipulations as a multi-task learning problem and design a dynamic weights strategy to improve cross-model performance. The proposed framework reports impressive results in defending against multiple widely used facial manipulations, effectively distorting the identifiable regions in the manipulated facial images. In addition, our experiments reveal the ID-Guard&#39;s ability to enable disrupted images to avoid face inpaintings and open-source image recognition systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13349v1-abstract-full').style.display = 'none'; document.getElementById('2409.13349v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.12467">arXiv:2409.12467</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.12467">pdf</a>, <a href="https://arxiv.org/format/2409.12467">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SurgPLAN++: Universal Surgical Phase Localization Network for Online and Offline Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zhen Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xingjian Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+J">Jinlin Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Bai%2C+L">Long Bai</a>, <a href="/search/cs?searchtype=author&amp;query=Lei%2C+Z">Zhen Lei</a>, <a href="/search/cs?searchtype=author&amp;query=Ren%2C+H">Hongliang Ren</a>, <a href="/search/cs?searchtype=author&amp;query=Ourselin%2C+S">Sebastien Ourselin</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+H">Hongbin Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.12467v1-abstract-short" style="display: inline;"> Surgical phase recognition is critical for assisting surgeons in understanding surgical videos. Existing studies focused more on online surgical phase recognition, by leveraging preceding frames to predict the current frame. Despite great progress, they formulated the task as a series of frame-wise classification, which resulted in a lack of global context of the entire procedure and incoherent pr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12467v1-abstract-full').style.display = 'inline'; document.getElementById('2409.12467v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.12467v1-abstract-full" style="display: none;"> Surgical phase recognition is critical for assisting surgeons in understanding surgical videos. Existing studies focused more on online surgical phase recognition, by leveraging preceding frames to predict the current frame. Despite great progress, they formulated the task as a series of frame-wise classification, which resulted in a lack of global context of the entire procedure and incoherent predictions. Moreover, besides online analysis, accurate offline surgical phase recognition is also in significant clinical need for retrospective analysis, and existing online algorithms do not fully analyze the entire video, thereby limiting accuracy in offline analysis. To overcome these challenges and enhance both online and offline inference capabilities, we propose a universal Surgical Phase Localization Network, named SurgPLAN++, with the principle of temporal detection. To ensure a global understanding of the surgical procedure, we devise a phase localization strategy for SurgPLAN++ to predict phase segments across the entire video through phase proposals. For online analysis, to generate high-quality phase proposals, SurgPLAN++ incorporates a data augmentation strategy to extend the streaming video into a pseudo-complete video through mirroring, center-duplication, and down-sampling. For offline analysis, SurgPLAN++ capitalizes on its global phase prediction framework to continuously refine preceding predictions during each online inference step, thereby significantly improving the accuracy of phase recognition. We perform extensive experiments to validate the effectiveness, and our SurgPLAN++ achieves remarkable performance in both online and offline modes, which outperforms state-of-the-art methods. The source code is available at https://github.com/lxj22/SurgPLAN-Plus. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.12467v1-abstract-full').style.display = 'none'; document.getElementById('2409.12467v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.08946">arXiv:2409.08946</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.08946">pdf</a>, <a href="https://arxiv.org/format/2409.08946">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> DELTA: Dual Consistency Delving with Topological Uncertainty for Active Graph Domain Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+P">Pengyun Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+Y">Yadi Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Russell%2C+C">Chris Russell</a>, <a href="/search/cs?searchtype=author&amp;query=Heng%2C+S">Siyu Heng</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+J">Junyu Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+Y">Yanxin Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiao Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.08946v1-abstract-short" style="display: inline;"> Graph domain adaptation has recently enabled knowledge transfer across different graphs. However, without the semantic information on target graphs, the performance on target graphs is still far from satisfactory. To address the issue, we study the problem of active graph domain adaptation, which selects a small quantitative of informative nodes on the target graph for extra annotation. This probl&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08946v1-abstract-full').style.display = 'inline'; document.getElementById('2409.08946v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.08946v1-abstract-full" style="display: none;"> Graph domain adaptation has recently enabled knowledge transfer across different graphs. However, without the semantic information on target graphs, the performance on target graphs is still far from satisfactory. To address the issue, we study the problem of active graph domain adaptation, which selects a small quantitative of informative nodes on the target graph for extra annotation. This problem is highly challenging due to the complicated topological relationships and the distribution discrepancy across graphs. In this paper, we propose a novel approach named Dual Consistency Delving with Topological Uncertainty (DELTA) for active graph domain adaptation. Our DELTA consists of an edge-oriented graph subnetwork and a path-oriented graph subnetwork, which can explore topological semantics from complementary perspectives. In particular, our edge-oriented graph subnetwork utilizes the message passing mechanism to learn neighborhood information, while our path-oriented graph subnetwork explores high-order relationships from substructures. To jointly learn from two subnetworks, we roughly select informative candidate nodes with the consideration of consistency across two subnetworks. Then, we aggregate local semantics from its K-hop subgraph based on node degrees for topological uncertainty estimation. To overcome potential distribution shifts, we compare target nodes and their corresponding source nodes for discrepancy scores as an additional component for fine selection. Extensive experiments on benchmark datasets demonstrate that DELTA outperforms various state-of-the-art approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08946v1-abstract-full').style.display = 'none'; document.getElementById('2409.08946v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.04025">arXiv:2409.04025</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.04025">pdf</a>, <a href="https://arxiv.org/format/2409.04025">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> BFA-YOLO: A balanced multiscale object detection network for building fa莽ade attachments detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yangguang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+T">Tong Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+G">Guanzhou Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+K">Kun Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+X">Xiaoliang Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jiaqi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+W">Wenchao Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Q">Qing Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiaolong Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xiaodong Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.04025v2-abstract-short" style="display: inline;"> The detection of fa莽ade elements on buildings, such as doors, windows, balconies, air conditioning units, billboards, and glass curtain walls, is a critical step in automating the creation of Building Information Modeling (BIM). Yet, this field faces significant challenges, including the uneven distribution of fa莽ade elements, the presence of small objects, and substantial background noise, which&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04025v2-abstract-full').style.display = 'inline'; document.getElementById('2409.04025v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.04025v2-abstract-full" style="display: none;"> The detection of fa莽ade elements on buildings, such as doors, windows, balconies, air conditioning units, billboards, and glass curtain walls, is a critical step in automating the creation of Building Information Modeling (BIM). Yet, this field faces significant challenges, including the uneven distribution of fa莽ade elements, the presence of small objects, and substantial background noise, which hamper detection accuracy. To address these issues, we develop the BFA-YOLO model and the BFA-3D dataset in this study. The BFA-YOLO model is an advanced architecture designed specifically for analyzing multi-view images of fa莽ade attachments. It integrates three novel components: the Feature Balanced Spindle Module (FBSM) that tackles the issue of uneven object distribution; the Target Dynamic Alignment Task Detection Head (TDATH) that enhances the detection of small objects; and the Position Memory Enhanced Self-Attention Mechanism (PMESA), aimed at reducing the impact of background noise. These elements collectively enable BFA-YOLO to effectively address each challenge, thereby improving model robustness and detection precision. The BFA-3D dataset, offers multi-view images with precise annotations across a wide range of fa莽ade attachment categories. This dataset is developed to address the limitations present in existing fa莽ade detection datasets, which often feature a single perspective and insufficient category coverage. Through comparative analysis, BFA-YOLO demonstrated improvements of 1.8\% and 2.9\% in mAP$_{50}$ on the BFA-3D dataset and the public Fa莽ade-WHU dataset, respectively, when compared to the baseline YOLOv8 model. These results highlight the superior performance of BFA-YOLO in fa莽ade element detection and the advancement of intelligent BIM technologies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04025v2-abstract-full').style.display = 'none'; document.getElementById('2409.04025v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.03455">arXiv:2409.03455</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.03455">pdf</a>, <a href="https://arxiv.org/format/2409.03455">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Data-free Distillation with Degradation-prompt Diffusion for Multi-weather Image Restoration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+P">Pei Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiaotong Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+Y">Yuan Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Qu%2C+Y">Yanyun Qu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.03455v1-abstract-short" style="display: inline;"> Multi-weather image restoration has witnessed incredible progress, while the increasing model capacity and expensive data acquisition impair its applications in memory-limited devices. Data-free distillation provides an alternative for allowing to learn a lightweight student model from a pre-trained teacher model without relying on the original training data. The existing data-free learning method&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03455v1-abstract-full').style.display = 'inline'; document.getElementById('2409.03455v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.03455v1-abstract-full" style="display: none;"> Multi-weather image restoration has witnessed incredible progress, while the increasing model capacity and expensive data acquisition impair its applications in memory-limited devices. Data-free distillation provides an alternative for allowing to learn a lightweight student model from a pre-trained teacher model without relying on the original training data. The existing data-free learning methods mainly optimize the models with the pseudo data generated by GANs or the real data collected from the Internet. However, they inevitably suffer from the problems of unstable training or domain shifts with the original data. In this paper, we propose a novel Data-free Distillation with Degradation-prompt Diffusion framework for multi-weather Image Restoration (D4IR). It replaces GANs with pre-trained diffusion models to avoid model collapse and incorporates a degradation-aware prompt adapter to facilitate content-driven conditional diffusion for generating domain-related images. Specifically, a contrast-based degradation prompt adapter is firstly designed to capture degradation-aware prompts from web-collected degraded images. Then, the collected unpaired clean images are perturbed to latent features of stable diffusion, and conditioned with the degradation-aware prompts to synthesize new domain-related degraded images for knowledge distillation. Experiments illustrate that our proposal achieves comparable performance to the model distilled with original training data, and is even superior to other mainstream unsupervised methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03455v1-abstract-full').style.display = 'none'; document.getElementById('2409.03455v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.02724">arXiv:2409.02724</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2409.02724">pdf</a>, <a href="https://arxiv.org/format/2409.02724">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Surgical Task Automation Using Actor-Critic Frameworks and Self-Supervised Imitation Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Jingshuai Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Andres%2C+A">Alain Andres</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+Y">Yonghang Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xichun Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Shu%2C+W">Wenmiao Shu</a>, <a href="/search/cs?searchtype=author&amp;query=Tsaftaris%2C+S+A">Sotirios A. Tsaftaris</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.02724v2-abstract-short" style="display: inline;"> Surgical robot task automation has recently attracted great attention due to its potential to benefit both surgeons and patients. Reinforcement learning (RL) based approaches have demonstrated promising ability to provide solutions to automated surgical manipulations on various tasks. To address the exploration challenge, expert demonstrations can be utilized to enhance the learning efficiency via&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02724v2-abstract-full').style.display = 'inline'; document.getElementById('2409.02724v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.02724v2-abstract-full" style="display: none;"> Surgical robot task automation has recently attracted great attention due to its potential to benefit both surgeons and patients. Reinforcement learning (RL) based approaches have demonstrated promising ability to provide solutions to automated surgical manipulations on various tasks. To address the exploration challenge, expert demonstrations can be utilized to enhance the learning efficiency via imitation learning (IL) approaches. However, the successes of such methods normally rely on both states and action labels. Unfortunately action labels can be hard to capture or their manual annotation is prohibitively expensive owing to the requirement for expert knowledge. It therefore remains an appealing and open problem to leverage expert demonstrations composed of pure states in RL. In this work, we present an actor-critic RL framework, termed AC-SSIL, to overcome this challenge of learning with state-only demonstrations collected by following an unknown expert policy. It adopts a self-supervised IL method, dubbed SSIL, to effectively incorporate demonstrated states into RL paradigms by retrieving from demonstrates the nearest neighbours of the query state and utilizing the bootstrapping of actor networks. We showcase through experiments on an open-source surgical simulation platform that our method delivers remarkable improvements over the RL baseline and exhibits comparable performance against action based IL methods, which implies the efficacy and potential of our method for expert demonstration-guided learning scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02724v2-abstract-full').style.display = 'none'; document.getElementById('2409.02724v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages,7 figures, 62 conferences</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14520">arXiv:2408.14520</a> <span>&nbsp;&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Towards Graph Prompt Learning: A Survey and Beyond </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Long%2C+Q">Qingqing Long</a>, <a href="/search/cs?searchtype=author&amp;query=Yan%2C+Y">Yuchen Yan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+P">Peiyan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Fang%2C+C">Chen Fang</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+W">Wentao Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Ning%2C+Z">Zhiyuan Ning</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+M">Meng Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+N">Ning Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiao Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+L">Lingjun Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+S">Shiyue Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Fang%2C+Z">Zheng Fang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+C">Chong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Hua%2C+X">Xian-Sheng Hua</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+Y">Yuanchun Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14520v3-abstract-short" style="display: inline;"> Large-scale &#34;pre-train and prompt learning&#34; paradigms have demonstrated remarkable adaptability, enabling broad applications across diverse domains such as question answering, image recognition, and multimodal retrieval. This approach fully leverages the potential of large-scale pre-trained models, reducing downstream data requirements and computational costs while enhancing model applicability ac&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14520v3-abstract-full').style.display = 'inline'; document.getElementById('2408.14520v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14520v3-abstract-full" style="display: none;"> Large-scale &#34;pre-train and prompt learning&#34; paradigms have demonstrated remarkable adaptability, enabling broad applications across diverse domains such as question answering, image recognition, and multimodal retrieval. This approach fully leverages the potential of large-scale pre-trained models, reducing downstream data requirements and computational costs while enhancing model applicability across various tasks. Graphs, as versatile data structures that capture relationships between entities, play pivotal roles in fields such as social network analysis, recommender systems, and biological graphs. Despite the success of pre-train and prompt learning paradigms in Natural Language Processing (NLP) and Computer Vision (CV), their application in graph domains remains nascent. In graph-structured data, not only do the node and edge features often have disparate distributions, but the topological structures also differ significantly. This diversity in graph data can lead to incompatible patterns or gaps between pre-training and fine-tuning on downstream graphs. We aim to bridge this gap by summarizing methods for alleviating these disparities. This includes exploring prompt design methodologies, comparing related techniques, assessing application scenarios and datasets, and identifying unresolved problems and challenges. This survey categorizes over 100 relevant works in this field, summarizing general design principles and the latest applications, including text-attributed graphs, molecules, proteins, and recommendation systems. Through this extensive review, we provide a foundational understanding of graph prompt learning, aiming to impact not only the graph mining community but also the broader Artificial General Intelligence (AGI) community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14520v3-abstract-full').style.display = 'none'; document.getElementById('2408.14520v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">I have decided to temporarily withdraw this draft as I am in the process of making further revisions to improve its content</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.12185">arXiv:2408.12185</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.12185">pdf</a>, <a href="https://arxiv.org/format/2408.12185">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.24963/ijcai.2024/520">10.24963/ijcai.2024/520 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Rank and Align: Towards Effective Source-free Graph Domain Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luo%2C+J">Junyu Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+Z">Zhiping Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yifan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiao Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+J">Jingyang Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Ju%2C+W">Wei Ju</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+L">Langechuan Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+M">Ming Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.12185v1-abstract-short" style="display: inline;"> Graph neural networks (GNNs) have achieved impressive performance in graph domain adaptation. However, extensive source graphs could be unavailable in real-world scenarios due to privacy and storage concerns. To this end, we investigate an underexplored yet practical problem of source-free graph domain adaptation, which transfers knowledge from source models instead of source graphs to a target do&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12185v1-abstract-full').style.display = 'inline'; document.getElementById('2408.12185v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.12185v1-abstract-full" style="display: none;"> Graph neural networks (GNNs) have achieved impressive performance in graph domain adaptation. However, extensive source graphs could be unavailable in real-world scenarios due to privacy and storage concerns. To this end, we investigate an underexplored yet practical problem of source-free graph domain adaptation, which transfers knowledge from source models instead of source graphs to a target domain. To solve this problem, we introduce a novel GNN-based approach called Rank and Align (RNA), which ranks graph similarities with spectral seriation for robust semantics learning, and aligns inharmonic graphs with harmonic graphs which close to the source domain for subgraph extraction. In particular, to overcome label scarcity, we employ the spectral seriation algorithm to infer the robust pairwise rankings, which can guide semantic learning using a similarity learning objective. To depict distribution shifts, we utilize spectral clustering and the silhouette coefficient to detect harmonic graphs, which the source model can easily classify. To reduce potential domain discrepancy, we extract domain-invariant subgraphs from inharmonic graphs by an adversarial edge sampling process, which guides the invariant learning of GNNs. Extensive experiments on several benchmark datasets demonstrate the effectiveness of our proposed RNA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12185v1-abstract-full').style.display = 'none'; document.getElementById('2408.12185v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published in IJCAI2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.12171">arXiv:2408.12171</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.12171">pdf</a>, <a href="https://arxiv.org/format/2408.12171">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Recent Advances on Machine Learning for Computational Fluid Dynamics: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Haixin Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+Y">Yadi Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Z">Zijie Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yuxuan Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+P">Peiyan Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiao Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+Z">Zezheng Song</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+W">Wanjia Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Jilin Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+J">Jinan Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Shikun Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+L">Long Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yue Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+T">Tailin Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+Z">Zhi-Ming Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Y">Yizhou Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.12171v1-abstract-short" style="display: inline;"> This paper explores the recent advancements in enhancing Computational Fluid Dynamics (CFD) tasks through Machine Learning (ML) techniques. We begin by introducing fundamental concepts, traditional methods, and benchmark datasets, then examine the various roles ML plays in improving CFD. The literature systematically reviews papers in recent five years and introduces a novel classification for for&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12171v1-abstract-full').style.display = 'inline'; document.getElementById('2408.12171v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.12171v1-abstract-full" style="display: none;"> This paper explores the recent advancements in enhancing Computational Fluid Dynamics (CFD) tasks through Machine Learning (ML) techniques. We begin by introducing fundamental concepts, traditional methods, and benchmark datasets, then examine the various roles ML plays in improving CFD. The literature systematically reviews papers in recent five years and introduces a novel classification for forward modeling: Data-driven Surrogates, Physics-Informed Surrogates, and ML-assisted Numerical Solutions. Furthermore, we also review the latest ML methods in inverse design and control, offering a novel classification and providing an in-depth discussion. Then we highlight real-world applications of ML for CFD in critical scientific and engineering disciplines, including aerodynamics, combustion, atmosphere &amp; ocean science, biology fluid, plasma, symbolic regression, and reduced order modeling. Besides, we identify key challenges and advocate for future research directions to address these challenges, such as multi-scale representation, physical knowledge encoding, scientific foundation model and automatic scientific discovery. This review serves as a guide for the rapidly expanding ML for CFD community, aiming to inspire insights for future advancements. We draw the conclusion that ML is poised to significantly transform CFD research by enhancing simulation accuracy, reducing computational time, and enabling more complex analyses of fluid dynamics. The paper resources can be viewed at https://github.com/WillDreamer/Awesome-AI4CFD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12171v1-abstract-full').style.display = 'none'; document.getElementById('2408.12171v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.09943">arXiv:2408.09943</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.09943">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Calibrating Noise for Group Privacy in Subsampled Mechanisms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+Y">Yangfan Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xinjian Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yin Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+X">Xiaokui Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.09943v2-abstract-short" style="display: inline;"> Given a group size m and a sensitive dataset D, group privacy (GP) releases information about D with the guarantee that the adversary cannot infer with high confidence whether the underlying data is D or a neighboring dataset D&#39; that differs from D by m records. GP generalizes the well-established notion of differential privacy (DP) for protecting individuals&#39; privacy; in particular, when m=1, GP&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09943v2-abstract-full').style.display = 'inline'; document.getElementById('2408.09943v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.09943v2-abstract-full" style="display: none;"> Given a group size m and a sensitive dataset D, group privacy (GP) releases information about D with the guarantee that the adversary cannot infer with high confidence whether the underlying data is D or a neighboring dataset D&#39; that differs from D by m records. GP generalizes the well-established notion of differential privacy (DP) for protecting individuals&#39; privacy; in particular, when m=1, GP reduces to DP. Compared to DP, GP is capable of protecting the sensitive aggregate information of a group of up to m individuals, e.g., the average annual income among members of a yacht club. Despite its longstanding presence in the research literature and its promising applications, GP is often treated as an afterthought, with most approaches first developing a DP mechanism and then using a generic conversion to adapt it for GP, treating the DP solution as a black box. As we point out in the paper, this methodology is suboptimal when the underlying DP solution involves subsampling, e.g., in the classic DP-SGD method for training deep learning models. In this case, the DP-to-GP conversion is overly pessimistic in its analysis, leading to low utility in the published results under GP. Motivated by this, we propose a novel analysis framework that provides tight privacy accounting for subsampled GP mechanisms. Instead of converting a black-box DP mechanism to GP, our solution carefully analyzes and utilizes the inherent randomness in subsampled mechanisms, leading to a substantially improved bound on the privacy loss with respect to GP. The proposed solution applies to a wide variety of foundational mechanisms with subsampling. Extensive experiments with real datasets demonstrate that compared to the baseline convert-from-blackbox-DP approach, our GP mechanisms achieve noise reductions of over an order of magnitude in several practical settings, including deep neural network training. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09943v2-abstract-full').style.display = 'none'; document.getElementById('2408.09943v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted for publication in Proceedings of VLDB Endowment (PVLDB) 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.09411">arXiv:2408.09411</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.09411">pdf</a>, <a href="https://arxiv.org/format/2408.09411">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.59275/j.melba.2024-489g">10.59275/j.melba.2024-489g <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Weakly Supervised Lymph Nodes Segmentation Based on Partial Instance Annotations with Pre-trained Dual-branch Network and Pseudo Label Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Litingyu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Qu%2C+Y">Yijie Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiangde Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Liao%2C+W">Wenjun Liao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Shichuan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+G">Guotai Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.09411v1-abstract-short" style="display: inline;"> Assessing the presence of potentially malignant lymph nodes aids in estimating cancer progression, and identifying surrounding benign lymph nodes can assist in determining potential metastatic pathways for cancer. For quantitative analysis, automatic segmentation of lymph nodes is crucial. However, due to the labor-intensive and time-consuming manual annotation process required for a large number&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09411v1-abstract-full').style.display = 'inline'; document.getElementById('2408.09411v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.09411v1-abstract-full" style="display: none;"> Assessing the presence of potentially malignant lymph nodes aids in estimating cancer progression, and identifying surrounding benign lymph nodes can assist in determining potential metastatic pathways for cancer. For quantitative analysis, automatic segmentation of lymph nodes is crucial. However, due to the labor-intensive and time-consuming manual annotation process required for a large number of lymph nodes, it is more practical to annotate only a subset of the lymph node instances to reduce annotation costs. In this study, we propose a pre-trained Dual-Branch network with Dynamically Mixed Pseudo label (DBDMP) to learn from partial instance annotations for lymph nodes segmentation. To obtain reliable pseudo labels for lymph nodes that are not annotated, we employ a dual-decoder network to generate different outputs that are then dynamically mixed. We integrate the original weak partial annotations with the mixed pseudo labels to supervise the network. To further leverage the extensive amount of unannotated voxels, we apply a self-supervised pre-training strategy to enhance the model&#39;s feature extraction capability. Experiments on the mediastinal Lymph Node Quantification (LNQ) dataset demonstrate that our method, compared to directly learning from partial instance annotations, significantly improves the Dice Similarity Coefficient (DSC) from 11.04% to 54.10% and reduces the Average Symmetric Surface Distance (ASSD) from 20.83 $mm$ to 8.72 $mm$. The code is available at https://github.com/WltyBY/LNQ2023_training_code.git <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09411v1-abstract-full').style.display = 'none'; document.getElementById('2408.09411v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication at the Journal of Machine Learning for Biomedical Imaging (MELBA) https://melba-journal.org/2024:013</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Machine.Learning.for.Biomedical.Imaging. 2 (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.09380">arXiv:2408.09380</a> <span>&nbsp;&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> ELASTIC: Efficient Linear Attention for Sequential Interest Compression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Deng%2C+J">Jiaxin Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Shiyao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+S">Song Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yinfeng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xinchen Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yuanjun Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+P">Peixing Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+G">Guorui Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.09380v3-abstract-short" style="display: inline;"> State-of-the-art sequential recommendation models heavily rely on transformer&#39;s attention mechanism. However, the quadratic computational and memory complexities of self attention have limited its scalability for modeling users&#39; long range behaviour sequences. To address this problem, we propose ELASTIC, an Efficient Linear Attention for SequenTial Interest Compression, requiring only linear time&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09380v3-abstract-full').style.display = 'inline'; document.getElementById('2408.09380v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.09380v3-abstract-full" style="display: none;"> State-of-the-art sequential recommendation models heavily rely on transformer&#39;s attention mechanism. However, the quadratic computational and memory complexities of self attention have limited its scalability for modeling users&#39; long range behaviour sequences. To address this problem, we propose ELASTIC, an Efficient Linear Attention for SequenTial Interest Compression, requiring only linear time complexity and decoupling model capacity from computational cost. Specifically, ELASTIC introduces a fixed length interest experts with linear dispatcher attention mechanism which compresses the long-term behaviour sequences to a significantly more compact representation which reduces up to 90% GPU memory usage with x2.7 inference speed up. The proposed linear dispatcher attention mechanism significantly reduces the quadratic complexity and makes the model feasible for adequately modeling extremely long sequences. Moreover, in order to retain the capacity for modeling various user interests, ELASTIC initializes a vast learnable interest memory bank and sparsely retrieves compressed user&#39;s interests from the memory with a negligible computational overhead. The proposed interest memory retrieval technique significantly expands the cardinality of available interest space while keeping the same computational cost, thereby striking a trade-off between recommendation accuracy and efficiency. To validate the effectiveness of our proposed ELASTIC, we conduct extensive experiments on various public datasets and compare it with several strong sequential recommenders. Experimental results demonstrate that ELASTIC consistently outperforms baselines by a significant margin and also highlight the computational efficiency of ELASTIC when modeling long sequences. We will make our implementation code publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09380v3-abstract-full').style.display = 'none'; document.getElementById('2408.09380v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">We hereby withdraw this paper from arXiv due to incomplete experiments. Upon further review, we have determined that additional experimental work is necessary to fully validate our findings and conclusions</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08696">arXiv:2408.08696</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.08696">pdf</a>, <a href="https://arxiv.org/format/2408.08696">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Turning Trash into Treasure: Accelerating Inference of Large Language Models with Token Recycling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xianzhen Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yixuan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Q">Qingfu Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhiming Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X">Xuanyu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Q">Qing Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+D">Dongliang Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Che%2C+W">Wanxiang Che</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08696v1-abstract-short" style="display: inline;"> The rapid growth in the parameters of large language models (LLMs) has made inference latency a fundamental bottleneck, limiting broader application of LLMs. Speculative decoding represents a lossless approach to accelerate inference through a guess-and-verify paradigm, leveraging the parallel capabilities of modern hardware. Some speculative decoding methods rely on additional structures to guess&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08696v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08696v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08696v1-abstract-full" style="display: none;"> The rapid growth in the parameters of large language models (LLMs) has made inference latency a fundamental bottleneck, limiting broader application of LLMs. Speculative decoding represents a lossless approach to accelerate inference through a guess-and-verify paradigm, leveraging the parallel capabilities of modern hardware. Some speculative decoding methods rely on additional structures to guess draft tokens, such as small models or parameter-efficient architectures, which need extra training before use. Alternatively, retrieval-based train-free techniques build libraries from pre-existing corpora or by n-gram generation. However, they face challenges like large storage requirements, time-consuming retrieval, and limited adaptability. Observing that candidate tokens generated during the decoding process are likely to reoccur in future sequences, we propose Token Recycling. This approach stores candidate tokens in an adjacency matrix and employs a breadth-first search (BFS)-like algorithm on the matrix to construct a draft tree. The tree is then validated through tree attention. New candidate tokens from the decoding process are then used to update the matrix. Token Recycling requires \textless2MB of additional storage and achieves approximately 2x speedup across all sizes of LLMs. It significantly outperforms existing train-free methods by 30\% and even a training method by 25\%. It can be directly applied to any existing LLMs and tasks without the need for adaptation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08696v1-abstract-full').style.display = 'none'; document.getElementById('2408.08696v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08338">arXiv:2408.08338</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.08338">pdf</a>, <a href="https://arxiv.org/ps/2408.08338">ps</a>, <a href="https://arxiv.org/format/2408.08338">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Activation Space Selectable Kolmogorov-Arnold Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Z">Zhuoqin Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jiansong Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiaoling Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+Z">Zheng Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+L">Linlin Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08338v1-abstract-short" style="display: inline;"> The multilayer perceptron (MLP), a fundamental paradigm in current artificial intelligence, is widely applied in fields such as computer vision and natural language processing. However, the recently proposed Kolmogorov-Arnold Network (KAN), based on nonlinear additive connections, has been proven to achieve performance comparable to MLPs with significantly fewer parameters. Despite this potential,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08338v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08338v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08338v1-abstract-full" style="display: none;"> The multilayer perceptron (MLP), a fundamental paradigm in current artificial intelligence, is widely applied in fields such as computer vision and natural language processing. However, the recently proposed Kolmogorov-Arnold Network (KAN), based on nonlinear additive connections, has been proven to achieve performance comparable to MLPs with significantly fewer parameters. Despite this potential, the use of a single activation function space results in reduced performance of KAN and related works across different tasks. To address this issue, we propose an activation space Selectable KAN (S-KAN). S-KAN employs an adaptive strategy to choose the possible activation mode for data at each feedforward KAN node. Our approach outperforms baseline methods in seven representative function fitting tasks and significantly surpasses MLP methods with the same level of parameters. Furthermore, we extend the structure of S-KAN and propose an activation space selectable Convolutional KAN (S-ConvKAN), which achieves leading results on four general image classification datasets. Our method mitigates the performance variability of the original KAN across different tasks and demonstrates through extensive experiments that feedforward KANs with selectable activations can achieve or even exceed the performance of MLP-based methods. This work contributes to the understanding of the data-centric design of new AI paradigms and provides a foundational reference for innovations in KAN-based network architectures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08338v1-abstract-full').style.display = 'none'; document.getElementById('2408.08338v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 6 figures. The code for this work will be released soon</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08188">arXiv:2408.08188</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.08188">pdf</a>, <a href="https://arxiv.org/format/2408.08188">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Logic in Computer Science">cs.LO</span> </div> </div> <p class="title is-5 mathjax"> Scaling Up Natural Language Understanding for Multi-Robots Through the Lens of Hierarchy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xu%2C+S">Shaojun Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xusheng Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yutong Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Leng%2C+L">Letian Leng</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+R">Ruixuan Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+C">Changliu Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08188v1-abstract-short" style="display: inline;"> Long-horizon planning is hindered by challenges such as uncertainty accumulation, computational complexity, delayed rewards and incomplete information. This work proposes an approach to exploit the task hierarchy from human instructions to facilitate multi-robot planning. Using Large Language Models (LLMs), we propose a two-step approach to translate multi-sentence instructions into a structured l&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08188v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08188v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08188v1-abstract-full" style="display: none;"> Long-horizon planning is hindered by challenges such as uncertainty accumulation, computational complexity, delayed rewards and incomplete information. This work proposes an approach to exploit the task hierarchy from human instructions to facilitate multi-robot planning. Using Large Language Models (LLMs), we propose a two-step approach to translate multi-sentence instructions into a structured language, Hierarchical Linear Temporal Logic (LTL), which serves as a formal representation for planning. Initially, LLMs transform the instructions into a hierarchical representation defined as Hierarchical Task Tree, capturing the logical and temporal relations among tasks. Following this, a domain-specific fine-tuning of LLM translates sub-tasks of each task into flat LTL formulas, aggregating them to form hierarchical LTL specifications. These specifications are then leveraged for planning using off-the-shelf planners. Our framework not only bridges the gap between instructions and algorithmic planning but also showcases the potential of LLMs in harnessing hierarchical reasoning to automate multi-robot task planning. Through evaluations in both simulation and real-world experiments involving human participants, we demonstrate that our method can handle more complex instructions compared to existing methods. The results indicate that our approach achieves higher success rates and lower costs in multi-robot task allocation and plan generation. Demos videos are available at https://youtu.be/7WOrDKxIMIs . <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08188v1-abstract-full').style.display = 'none'; document.getElementById('2408.08188v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08083">arXiv:2408.08083</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.08083">pdf</a>, <a href="https://arxiv.org/format/2408.08083">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> Confidence-weighted integration of human and machine judgments for superior decision-making </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Y%C3%A1%C3%B1ez%2C+F">Felipe Y谩帽ez</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiaoliang Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Minero%2C+O+V">Omar Valerio Minero</a>, <a href="/search/cs?searchtype=author&amp;query=Love%2C+B+C">Bradley C. Love</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08083v1-abstract-short" style="display: inline;"> Large language models (LLMs) have emerged as powerful tools in various domains. Recent studies have shown that LLMs can surpass humans in certain tasks, such as predicting the outcomes of neuroscience studies. What role does this leave for humans in the overall decision process? One possibility is that humans, despite performing worse than LLMs, can still add value when teamed with them. A human a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08083v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08083v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08083v1-abstract-full" style="display: none;"> Large language models (LLMs) have emerged as powerful tools in various domains. Recent studies have shown that LLMs can surpass humans in certain tasks, such as predicting the outcomes of neuroscience studies. What role does this leave for humans in the overall decision process? One possibility is that humans, despite performing worse than LLMs, can still add value when teamed with them. A human and machine team can surpass each individual teammate when team members&#39; confidence is well-calibrated and team members diverge in which tasks they find difficult (i.e., calibration and diversity are needed). We simplified and extended a Bayesian approach to combining judgments using a logistic regression framework that integrates confidence-weighted judgments for any number of team members. Using this straightforward method, we demonstrated in a neuroscience forecasting task that, even when humans were inferior to LLMs, their combination with one or more LLMs consistently improved team performance. Our hope is that this simple and effective strategy for integrating the judgments of humans and machines will lead to productive collaborations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08083v1-abstract-full').style.display = 'none'; document.getElementById('2408.08083v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.06761">arXiv:2408.06761</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.06761">pdf</a>, <a href="https://arxiv.org/format/2408.06761">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Cross-View Geolocalization and Disaster Mapping with Street-View and VHR Satellite Imagery: A Case Study of Hurricane IAN </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Deuser%2C+F">Fabian Deuser</a>, <a href="/search/cs?searchtype=author&amp;query=Yina%2C+W">Wenping Yina</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xuanshu Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Walther%2C+P">Paul Walther</a>, <a href="/search/cs?searchtype=author&amp;query=Mai%2C+G">Gengchen Mai</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+W">Wei Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Werner%2C+M">Martin Werner</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.06761v1-abstract-short" style="display: inline;"> Nature disasters play a key role in shaping human-urban infrastructure interactions. Effective and efficient response to natural disasters is essential for building resilience and a sustainable urban environment. Two types of information are usually the most necessary and difficult to gather in disaster response. The first information is about disaster damage perception, which shows how badly peop&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06761v1-abstract-full').style.display = 'inline'; document.getElementById('2408.06761v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.06761v1-abstract-full" style="display: none;"> Nature disasters play a key role in shaping human-urban infrastructure interactions. Effective and efficient response to natural disasters is essential for building resilience and a sustainable urban environment. Two types of information are usually the most necessary and difficult to gather in disaster response. The first information is about disaster damage perception, which shows how badly people think that urban infrastructure has been damaged. The second information is geolocation awareness, which means how people whereabouts are made available. In this paper, we proposed a novel disaster mapping framework, namely CVDisaster, aiming at simultaneously addressing geolocalization and damage perception estimation using cross-view Street-View Imagery (SVI) and Very High-Resolution satellite imagery. CVDisaster consists of two cross-view models, where CVDisaster-Geoloc refers to a cross-view geolocalization model based on a contrastive learning objective with a Siamese ConvNeXt image encoder, and CVDisaster-Est is a cross-view classification model based on a Couple Global Context Vision Transformer (CGCViT). Taking Hurricane IAN as a case study, we evaluate the CVDisaster framework by creating a novel cross-view dataset (CVIAN) and conducting extensive experiments. As a result, we show that CVDisaster can achieve highly competitive performance (over 80% for geolocalization and 75% for damage perception estimation) with even limited fine-tuning efforts, which largely motivates future cross-view models and applications within a broader GeoAI research community. The data and code are publicly available at: https://github.com/tum-bgd/CVDisaster. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06761v1-abstract-full').style.display = 'none'; document.getElementById('2408.06761v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.06636">arXiv:2408.06636</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.06636">pdf</a>, <a href="https://arxiv.org/format/2408.06636">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Unified-IoU: For High-Quality Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiangjie Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Cai%2C+Z">Zhihao Cai</a>, <a href="/search/cs?searchtype=author&amp;query=Shao%2C+B">Bo Shao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yingxun Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.06636v1-abstract-short" style="display: inline;"> Object detection is an important part in the field of computer vision, and the effect of object detection is directly determined by the regression accuracy of the prediction box. As the key to model training, IoU (Intersection over Union) greatly shows the difference between the current prediction box and the Ground Truth box. Subsequent researchers have continuously added more considerations to I&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06636v1-abstract-full').style.display = 'inline'; document.getElementById('2408.06636v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.06636v1-abstract-full" style="display: none;"> Object detection is an important part in the field of computer vision, and the effect of object detection is directly determined by the regression accuracy of the prediction box. As the key to model training, IoU (Intersection over Union) greatly shows the difference between the current prediction box and the Ground Truth box. Subsequent researchers have continuously added more considerations to IoU, such as center distance, aspect ratio, and so on. However, there is an upper limit to just refining the geometric differences; And there is a potential connection between the new consideration index and the IoU itself, and the direct addition or subtraction between the two may lead to the problem of &#34;over-consideration&#34;. Based on this, we propose a new IoU loss function, called Unified-IoU (UIoU), which is more concerned with the weight assignment between different quality prediction boxes. Specifically, the loss function dynamically shifts the model&#39;s attention from low-quality prediction boxes to high-quality prediction boxes in a novel way to enhance the model&#39;s detection performance on high-precision or intensive datasets and achieve a balance in training speed. Our proposed method achieves better performance on multiple datasets, especially at a high IoU threshold, UIoU has a more significant improvement effect compared with other improved IoU losses. Our code is publicly available at: https://github.com/lxj-drifter/UIOU_files. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06636v1-abstract-full').style.display = 'none'; document.getElementById('2408.06636v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.06633">arXiv:2408.06633</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.06633">pdf</a>, <a href="https://arxiv.org/format/2408.06633">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A lightweight YOLOv5-FFM model for occlusion pedestrian detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiangjie Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Shao%2C+B">Bo Shao</a>, <a href="/search/cs?searchtype=author&amp;query=Cai%2C+Z">Zhihao Cai</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yingxun Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.06633v1-abstract-short" style="display: inline;"> The development of autonomous driving technology must be inseparable from pedestrian detection. Because of the fast speed of the vehicle, the accuracy and real-time performance of the pedestrian detection algorithm are very important. YOLO, as an efficient and simple one-stage target detection method, is often used for pedestrian detection in various environments. However, this series of detectors&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06633v1-abstract-full').style.display = 'inline'; document.getElementById('2408.06633v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.06633v1-abstract-full" style="display: none;"> The development of autonomous driving technology must be inseparable from pedestrian detection. Because of the fast speed of the vehicle, the accuracy and real-time performance of the pedestrian detection algorithm are very important. YOLO, as an efficient and simple one-stage target detection method, is often used for pedestrian detection in various environments. However, this series of detectors face some challenges, such as excessive computation and undesirable detection rate when facing occluded pedestrians. In this paper, we propose an improved lightweight YOLOv5 model to deal with these problems. This model can achieve better pedestrian detection accuracy with fewer floating-point operations (FLOPs), especially for occluded targets. In order to achieve the above goals, we made improvements based on the YOLOv5 model framework and introduced Ghost module and SE block. Furthermore, we designed a local feature fusion module (FFM) to deal with occlusion in pedestrian detection. To verify the validity of our method, two datasets, Citypersons and CUHK Occlusion, were selected for the experiment. The experimental results show that, compared with the original yolov5s model, the average precision (AP) of our method is significantly improved, while the number of parameters is reduced by 27.9% and FLOPs are reduced by 19.0%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06633v1-abstract-full').style.display = 'none'; document.getElementById('2408.06633v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05502">arXiv:2408.05502</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.05502">pdf</a>, <a href="https://arxiv.org/format/2408.05502">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> GEM: Context-Aware Gaze EstiMation with Visual Search Behavior Matching for Chest Radiograph </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+S">Shaonan Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+W">Wenting Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Jie Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiaoling Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+L">Linlin Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05502v1-abstract-short" style="display: inline;"> Gaze estimation is pivotal in human scene comprehension tasks, particularly in medical diagnostic analysis. Eye-tracking technology facilitates the recording of physicians&#39; ocular movements during image interpretation, thereby elucidating their visual attention patterns and information-processing strategies. In this paper, we initially define the context-aware gaze estimation problem in medical ra&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05502v1-abstract-full').style.display = 'inline'; document.getElementById('2408.05502v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05502v1-abstract-full" style="display: none;"> Gaze estimation is pivotal in human scene comprehension tasks, particularly in medical diagnostic analysis. Eye-tracking technology facilitates the recording of physicians&#39; ocular movements during image interpretation, thereby elucidating their visual attention patterns and information-processing strategies. In this paper, we initially define the context-aware gaze estimation problem in medical radiology report settings. To understand the attention allocation and cognitive behavior of radiologists during the medical image interpretation process, we propose a context-aware Gaze EstiMation (GEM) network that utilizes eye gaze data collected from radiologists to simulate their visual search behavior patterns throughout the image interpretation process. It consists of a context-awareness module, visual behavior graph construction, and visual behavior matching. Within the context-awareness module, we achieve intricate multimodal registration by establishing connections between medical reports and images. Subsequently, for a more accurate simulation of genuine visual search behavior patterns, we introduce a visual behavior graph structure, capturing such behavior through high-order relationships (edges) between gaze points (nodes). To maintain the authenticity of visual behavior, we devise a visual behavior-matching approach, adjusting the high-order relationships between them by matching the graph constructed from real and estimated gaze points. Extensive experiments on four publicly available datasets demonstrate the superiority of GEM over existing methods and its strong generalizability, which also provides a new direction for the effective utilization of diverse modalities in medical image interpretation and enhances the interpretability of models in the field of medical imaging. https://github.com/Tiger-SN/GEM <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05502v1-abstract-full').style.display = 'none'; document.getElementById('2408.05502v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05341">arXiv:2408.05341</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.05341">pdf</a>, <a href="https://arxiv.org/format/2408.05341">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> CAR: Contrast-Agnostic Deformable Medical Image Registration with Contrast-Invariant Latent Regularization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yinsong Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+S">Siyi Du</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+S">Shaoming Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xinzhe Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Qin%2C+C">Chen Qin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05341v1-abstract-short" style="display: inline;"> Multi-contrast image registration is a challenging task due to the complex intensity relationships between different imaging contrasts. Conventional image registration methods are typically based on iterative optimizations for each input image pair, which is time-consuming and sensitive to contrast variations. While learning-based approaches are much faster during the inference stage, due to gener&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05341v1-abstract-full').style.display = 'inline'; document.getElementById('2408.05341v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05341v1-abstract-full" style="display: none;"> Multi-contrast image registration is a challenging task due to the complex intensity relationships between different imaging contrasts. Conventional image registration methods are typically based on iterative optimizations for each input image pair, which is time-consuming and sensitive to contrast variations. While learning-based approaches are much faster during the inference stage, due to generalizability issues, they typically can only be applied to the fixed contrasts observed during the training stage. In this work, we propose a novel contrast-agnostic deformable image registration framework that can be generalized to arbitrary contrast images, without observing them during training. Particularly, we propose a random convolution-based contrast augmentation scheme, which simulates arbitrary contrasts of images over a single image contrast while preserving their inherent structural information. To ensure that the network can learn contrast-invariant representations for facilitating contrast-agnostic registration, we further introduce contrast-invariant latent regularization (CLR) that regularizes representation in latent space through a contrast invariance loss. Experiments show that CAR outperforms the baseline approaches regarding registration accuracy and also possesses better generalization ability to unseen imaging contrasts. Code is available at \url{https://github.com/Yinsong0510/CAR}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05341v1-abstract-full').style.display = 'none'; document.getElementById('2408.05341v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 3 figures, 3 tables, accecpted by WBIR 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05124">arXiv:2408.05124</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.05124">pdf</a>, <a href="https://arxiv.org/format/2408.05124">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Modeling Electromagnetic Signal Injection Attacks on Camera-based Smart Systems: Applications and Mitigation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Youqian Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Cheung%2C+M">Michael Cheung</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+C">Chunxi Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhai%2C+X">Xinwei Zhai</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+Z">Zitong Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Ji%2C+X">Xinyu Ji</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+E+Y">Eugene Y. Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Chau%2C+S">Sze-Yiu Chau</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiapu Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05124v1-abstract-short" style="display: inline;"> Numerous safety- or security-critical systems depend on cameras to perceive their surroundings, further allowing artificial intelligence (AI) to analyze the captured images to make important decisions. However, a concerning attack vector has emerged, namely, electromagnetic waves, which pose a threat to the integrity of these systems. Such attacks enable attackers to manipulate the images remotely&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05124v1-abstract-full').style.display = 'inline'; document.getElementById('2408.05124v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05124v1-abstract-full" style="display: none;"> Numerous safety- or security-critical systems depend on cameras to perceive their surroundings, further allowing artificial intelligence (AI) to analyze the captured images to make important decisions. However, a concerning attack vector has emerged, namely, electromagnetic waves, which pose a threat to the integrity of these systems. Such attacks enable attackers to manipulate the images remotely, leading to incorrect AI decisions, e.g., autonomous vehicles missing detecting obstacles ahead resulting in collisions. The lack of understanding regarding how different systems react to such attacks poses a significant security risk. Furthermore, no effective solutions have been demonstrated to mitigate this threat. To address these gaps, we modeled the attacks and developed a simulation method for generating adversarial images. Through rigorous analysis, we confirmed that the effects of the simulated adversarial images are indistinguishable from those from real attacks. This method enables researchers and engineers to rapidly assess the susceptibility of various AI vision applications to these attacks, without the need for constructing complicated attack devices. In our experiments, most of the models demonstrated vulnerabilities to these attacks, emphasizing the need to enhance their robustness. Fortunately, our modeling and simulation method serves as a stepping stone toward developing more resilient models. We present a pilot study on adversarial training to improve their robustness against attacks, and our results demonstrate a significant improvement by recovering up to 91% performance, offering a promising direction for mitigating this threat. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05124v1-abstract-full').style.display = 'none'; document.getElementById('2408.05124v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 10 figures, 4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.04813">arXiv:2408.04813</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.04813">pdf</a>, <a href="https://arxiv.org/format/2408.04813">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Rethinking Multiple Instance Learning: Developing an Instance-Level Classifier via Weakly-Supervised Self-Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ma%2C+Y">Yingfan Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiaoyuan Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+M">Mingzhi Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xinrong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+M">Manning Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.04813v1-abstract-short" style="display: inline;"> Multiple instance learning (MIL) problem is currently solved from either bag-classification or instance-classification perspective, both of which ignore important information contained in some instances and result in limited performance. For example, existing methods often face difficulty in learning hard positive instances. In this paper, we formulate MIL as a semi-supervised instance classificat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04813v1-abstract-full').style.display = 'inline'; document.getElementById('2408.04813v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.04813v1-abstract-full" style="display: none;"> Multiple instance learning (MIL) problem is currently solved from either bag-classification or instance-classification perspective, both of which ignore important information contained in some instances and result in limited performance. For example, existing methods often face difficulty in learning hard positive instances. In this paper, we formulate MIL as a semi-supervised instance classification problem, so that all the labeled and unlabeled instances can be fully utilized to train a better classifier. The difficulty in this formulation is that all the labeled instances are negative in MIL, and traditional self-training techniques used in semi-supervised learning tend to degenerate in generating pseudo labels for the unlabeled instances in this scenario. To resolve this problem, we propose a weakly-supervised self-training method, in which we utilize the positive bag labels to construct a global constraint and a local constraint on the pseudo labels to prevent them from degenerating and force the classifier to learn hard positive instances. It is worth noting that easy positive instances are instances are far from the decision boundary in the classification process, while hard positive instances are those close to the decision boundary. Through iterative optimization, the pseudo labels can gradually approach the true labels. Extensive experiments on two MNIST synthetic datasets, five traditional MIL benchmark datasets and two histopathology whole slide image datasets show that our method achieved new SOTA performance on all of them. The code will be publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04813v1-abstract-full').style.display = 'none'; document.getElementById('2408.04813v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.04194">arXiv:2408.04194</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.04194">pdf</a>, <a href="https://arxiv.org/format/2408.04194">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> FDI: Attack Neural Code Generation Systems through User Feedback Channel </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Z">Zhensu Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+X">Xiaoning Du</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+X">Xiapu Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+F">Fu Song</a>, <a href="/search/cs?searchtype=author&amp;query=Lo%2C+D">David Lo</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+L">Li Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.04194v1-abstract-short" style="display: inline;"> Neural code generation systems have recently attracted increasing attention to improve developer productivity and speed up software development. Typically, these systems maintain a pre-trained neural model and make it available to general users as a service (e.g., through remote APIs) and incorporate a feedback mechanism to extensively collect and utilize the users&#39; reaction to the generated code,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04194v1-abstract-full').style.display = 'inline'; document.getElementById('2408.04194v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.04194v1-abstract-full" style="display: none;"> Neural code generation systems have recently attracted increasing attention to improve developer productivity and speed up software development. Typically, these systems maintain a pre-trained neural model and make it available to general users as a service (e.g., through remote APIs) and incorporate a feedback mechanism to extensively collect and utilize the users&#39; reaction to the generated code, i.e., user feedback. However, the security implications of such feedback have not yet been explored. With a systematic study of current feedback mechanisms, we find that feedback makes these systems vulnerable to feedback data injection (FDI) attacks. We discuss the methodology of FDI attacks and present a pre-attack profiling strategy to infer the attack constraints of a targeted system in the black-box setting. We demonstrate two proof-of-concept examples utilizing the FDI attack surface to implement prompt injection attacks and backdoor attacks on practical neural code generation systems. The attacker may stealthily manipulate a neural code generation system to generate code with vulnerabilities, attack payload, and malicious and spam messages. Our findings reveal the security implications of feedback mechanisms in neural code generation systems, paving the way for increasing their security. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.04194v1-abstract-full').style.display = 'none'; document.getElementById('2408.04194v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ISSTA&#39;24</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Luo%2C+X&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Luo%2C+X&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Luo%2C+X&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Luo%2C+X&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Luo%2C+X&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Luo%2C+X&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10