Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 204 results for author: <span class="mathjax">Yin, M</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Yin%2C+M">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Yin, M"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Yin%2C+M&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Yin, M"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Yin%2C+M&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Yin%2C+M&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Yin%2C+M&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Yin%2C+M&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Yin%2C+M&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Yin%2C+M&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.16961">arXiv:2411.16961</a> <span> [<a href="https://arxiv.org/pdf/2411.16961">pdf</a>, <a href="https://arxiv.org/format/2411.16961">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Glo-In-One-v2: Holistic Identification of Glomerular Cells, Tissues, and Lesions in Human and Mouse Histopathology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+L">Lining Yu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mengmeng Yin</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+R">Ruining Deng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Quan Liu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+T">Tianyuan Yao</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+C">Can Cui</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Junlin Guo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yaohong Wang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Shilin Zhao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Haichun Yang</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+Y">Yuankai Huo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.16961v1-abstract-short" style="display: inline;"> Segmenting glomerular intraglomerular tissue and lesions traditionally depends on detailed morphological evaluations by expert nephropathologists, a labor-intensive process susceptible to interobserver variability. Our group previously developed the Glo-In-One toolkit for integrated detection and segmentation of glomeruli. In this study, we leverage the Glo-In-One toolkit to version 2 with fine-gr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16961v1-abstract-full').style.display = 'inline'; document.getElementById('2411.16961v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.16961v1-abstract-full" style="display: none;"> Segmenting glomerular intraglomerular tissue and lesions traditionally depends on detailed morphological evaluations by expert nephropathologists, a labor-intensive process susceptible to interobserver variability. Our group previously developed the Glo-In-One toolkit for integrated detection and segmentation of glomeruli. In this study, we leverage the Glo-In-One toolkit to version 2 with fine-grained segmentation capabilities, curating 14 distinct labels for tissue regions, cells, and lesions across a dataset of 23,529 annotated glomeruli across human and mouse histopathology data. To our knowledge, this dataset is among the largest of its kind to date.In this study, we present a single dynamic head deep learning architecture designed to segment 14 classes within partially labeled images of human and mouse pathology data. Our model was trained using a training set derived from 368 annotated kidney whole-slide images (WSIs) to identify 5 key intraglomerular tissues covering Bowman's capsule, glomerular tuft, mesangium, mesangial cells, and podocytes. Additionally, the network segments 9 glomerular lesion classes including adhesion, capsular drop, global sclerosis, hyalinosis, mesangial lysis, microaneurysm, nodular sclerosis, mesangial expansion, and segmental sclerosis. The glomerulus segmentation model achieved a decent performance compared with baselines, and achieved a 76.5 % average Dice Similarity Coefficient (DSC). Additional, transfer learning from rodent to human for glomerular lesion segmentation model has enhanced the average segmentation accuracy across different types of lesions by more than 3 %, as measured by Dice scores. The Glo-In-One-v2 model and trained weight have been made publicly available at https: //github.com/hrlblab/Glo-In-One_v2. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16961v1-abstract-full').style.display = 'none'; document.getElementById('2411.16961v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.16746">arXiv:2411.16746</a> <span> [<a href="https://arxiv.org/pdf/2411.16746">pdf</a>, <a href="https://arxiv.org/format/2411.16746">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LoBAM: LoRA-Based Backdoor Attack on Model Merging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yin%2C+M">Ming Yin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jingyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jingwei Sun</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+M">Minghong Fang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hai Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yiran Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.16746v1-abstract-short" style="display: inline;"> Model merging is an emerging technique that integrates multiple models fine-tuned on different tasks to create a versatile model that excels in multiple domains. This scheme, in the meantime, may open up backdoor attack opportunities where one single malicious model can jeopardize the integrity of the merged model. Existing works try to demonstrate the risk of such attacks by assuming substantial… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16746v1-abstract-full').style.display = 'inline'; document.getElementById('2411.16746v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.16746v1-abstract-full" style="display: none;"> Model merging is an emerging technique that integrates multiple models fine-tuned on different tasks to create a versatile model that excels in multiple domains. This scheme, in the meantime, may open up backdoor attack opportunities where one single malicious model can jeopardize the integrity of the merged model. Existing works try to demonstrate the risk of such attacks by assuming substantial computational resources, focusing on cases where the attacker can fully fine-tune the pre-trained model. Such an assumption, however, may not be feasible given the increasing size of machine learning models. In practice where resources are limited and the attacker can only employ techniques like Low-Rank Adaptation (LoRA) to produce the malicious model, it remains unclear whether the attack can still work and pose threats. In this work, we first identify that the attack efficacy is significantly diminished when using LoRA for fine-tuning. Then, we propose LoBAM, a method that yields high attack success rate with minimal training resources. The key idea of LoBAM is to amplify the malicious weights in an intelligent way that effectively enhances the attack efficacy. We demonstrate that our design can lead to improved attack success rate through both theoretical proof and extensive empirical experiments across various model merging scenarios. Moreover, we show that our method has strong stealthiness and is difficult to detect. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16746v1-abstract-full').style.display = 'none'; document.getElementById('2411.16746v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15215">arXiv:2411.15215</a> <span> [<a href="https://arxiv.org/pdf/2411.15215">pdf</a>, <a href="https://arxiv.org/format/2411.15215">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> S$^2$ALM: Sequence-Structure Pre-trained Large Language Model for Comprehensive Antibody Representation Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mingze Yin</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Hanjing Zhou</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jialu Wu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yiheng Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhan%2C+Y">Yuxuan Zhan</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+Z">Zitai Kong</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hongxia Xu</a>, <a href="/search/cs?searchtype=author&query=Hsieh%2C+C">Chang-Yu Hsieh</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jintai Chen</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+T">Tingjun Hou</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jian Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15215v1-abstract-short" style="display: inline;"> Antibodies safeguard our health through their precise and potent binding to specific antigens, demonstrating promising therapeutic efficacy in the treatment of numerous diseases, including COVID-19. Recent advancements in biomedical language models have shown the great potential to interpret complex biological structures and functions. However, existing antibody specific models have a notable limi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15215v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15215v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15215v1-abstract-full" style="display: none;"> Antibodies safeguard our health through their precise and potent binding to specific antigens, demonstrating promising therapeutic efficacy in the treatment of numerous diseases, including COVID-19. Recent advancements in biomedical language models have shown the great potential to interpret complex biological structures and functions. However, existing antibody specific models have a notable limitation that they lack explicit consideration for antibody structural information, despite the fact that both 1D sequence and 3D structure carry unique and complementary insights into antibody behavior and functionality. This paper proposes Sequence-Structure multi-level pre-trained Antibody Language Model (S$^2$ALM), combining holistic sequential and structural information in one unified, generic antibody foundation model. We construct a hierarchical pre-training paradigm incorporated with two customized multi-level training objectives to facilitate the modeling of comprehensive antibody representations. S$^2$ALM's representation space uncovers inherent functional binding mechanisms, biological evolution properties and structural interaction patterns. Pre-trained over 75 million sequences and 11.7 million structures, S$^2$ALM can be adopted for diverse downstream tasks: accurately predicting antigen-antibody binding affinities, precisely distinguishing B cell maturation stages, identifying antibody crucial binding positions, and specifically designing novel coronavirus-binding antibodies. Remarkably, S$^2$ALM outperforms well-established and renowned baselines and sets new state-of-the-art performance across extensive antibody specific understanding and generation tasks. S$^2$ALM's ability to model comprehensive and generalized representations further positions its potential to advance real-world therapeutic antibody development, potentially addressing unmet academic, industrial, and clinical needs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15215v1-abstract-full').style.display = 'none'; document.getElementById('2411.15215v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12950">arXiv:2411.12950</a> <span>  </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> KAAE: Numerical Reasoning for Knowledge Graphs via Knowledge-aware Attributes Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yin%2C+M">Ming Yin</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Q">Qiang Zhou</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Z">Zongsheng Cao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Mei Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12950v2-abstract-short" style="display: inline;"> Numerical reasoning is pivotal in various artificial intelligence applications, such as natural language processing and recommender systems, where it involves using entities, relations, and attribute values (e.g., weight, length) to infer new factual relations (e.g., the Nile is longer than the Amazon). However, existing approaches encounter two critical challenges in modeling: (1) semantic releva… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12950v2-abstract-full').style.display = 'inline'; document.getElementById('2411.12950v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12950v2-abstract-full" style="display: none;"> Numerical reasoning is pivotal in various artificial intelligence applications, such as natural language processing and recommender systems, where it involves using entities, relations, and attribute values (e.g., weight, length) to infer new factual relations (e.g., the Nile is longer than the Amazon). However, existing approaches encounter two critical challenges in modeling: (1) semantic relevance-the challenge of insufficiently capturing the necessary contextual interactions among entities, relations, and numerical attributes, often resulting in suboptimal inference; and (2) semantic ambiguity-the difficulty in accurately distinguishing ordinal relationships during numerical reasoning, which compromises the generation of high-quality samples and limits the effectiveness of contrastive learning. To address these challenges, we propose the novel Knowledge-Aware Attributes Embedding model (KAAE) for knowledge graph embeddings in numerical reasoning. Specifically, to overcome the challenge of semantic relevance, we introduce a Mixture-of-Experts-Knowledge-Aware (MoEKA) Encoder, designed to integrate the semantics of entities, relations, and numerical attributes into a joint semantic space. To tackle semantic ambiguity, we implement a new ordinal knowledge contrastive learning (OKCL) strategy that generates high-quality ordinal samples from the original data with the aid of ordinal relations, capturing fine-grained semantic nuances essential for accurate numerical reasoning. Experiments on three public benchmark datasets demonstrate the superior performance of KAAE across various attribute value distributions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12950v2-abstract-full').style.display = 'none'; document.getElementById('2411.12950v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper was decided to be withdrawn due to failure to resolve collaborative disputes within the research team or authorship issues. We are actively communicating to reach an agreement and avoid a recurrence of similar issues</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12593">arXiv:2411.12593</a> <span> [<a href="https://arxiv.org/pdf/2411.12593">pdf</a>, <a href="https://arxiv.org/format/2411.12593">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AdaCM$^2$: On Understanding Extremely Long-Term Video with Adaptive Cross-Modality Memory Reduction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Man%2C+Y">Yuanbin Man</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Ying Huang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chengming Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bingzhe Li</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+W">Wei Niu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Miao Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12593v1-abstract-short" style="display: inline;"> The advancements in large language models (LLMs) have propelled the improvement of video understanding tasks by incorporating LLMs with visual models. However, most existing LLM-based models (e.g., VideoLLaMA, VideoChat) are constrained to processing short-duration videos. Recent attempts to understand long-term videos by extracting and compressing visual features into a fixed memory size. Neverth… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12593v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12593v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12593v1-abstract-full" style="display: none;"> The advancements in large language models (LLMs) have propelled the improvement of video understanding tasks by incorporating LLMs with visual models. However, most existing LLM-based models (e.g., VideoLLaMA, VideoChat) are constrained to processing short-duration videos. Recent attempts to understand long-term videos by extracting and compressing visual features into a fixed memory size. Nevertheless, those methods leverage only visual modality to merge video tokens and overlook the correlation between visual and textual queries, leading to difficulties in effectively handling complex question-answering tasks. To address the challenges of long videos and complex prompts, we propose AdaCM$^2$, which, for the first time, introduces an adaptive cross-modality memory reduction approach to video-text alignment in an auto-regressive manner on video streams. Our extensive experiments on various video understanding tasks, such as video captioning, video question answering, and video classification, demonstrate that AdaCM$^2$ achieves state-of-the-art performance across multiple datasets while significantly reducing memory usage. Notably, it achieves a 4.5% improvement across multiple tasks in the LVU dataset with a GPU memory consumption reduction of up to 65%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12593v1-abstract-full').style.display = 'none'; document.getElementById('2411.12593v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10461">arXiv:2411.10461</a> <span> [<a href="https://arxiv.org/pdf/2411.10461">pdf</a>, <a href="https://arxiv.org/format/2411.10461">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Utilizing Human Behavior Modeling to Manipulate Explanations in AI-Assisted Decision Making: The Good, the Bad, and the Scary </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuoyan Li</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Ming Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10461v1-abstract-short" style="display: inline;"> Recent advances in AI models have increased the integration of AI-based decision aids into the human decision making process. To fully unlock the potential of AI-assisted decision making, researchers have computationally modeled how humans incorporate AI recommendations into their final decisions, and utilized these models to improve human-AI team performance. Meanwhile, due to the ``black-box'' n… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10461v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10461v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10461v1-abstract-full" style="display: none;"> Recent advances in AI models have increased the integration of AI-based decision aids into the human decision making process. To fully unlock the potential of AI-assisted decision making, researchers have computationally modeled how humans incorporate AI recommendations into their final decisions, and utilized these models to improve human-AI team performance. Meanwhile, due to the ``black-box'' nature of AI models, providing AI explanations to human decision makers to help them rely on AI recommendations more appropriately has become a common practice. In this paper, we explore whether we can quantitatively model how humans integrate both AI recommendations and explanations into their decision process, and whether this quantitative understanding of human behavior from the learned model can be utilized to manipulate AI explanations, thereby nudging individuals towards making targeted decisions. Our extensive human experiments across various tasks demonstrate that human behavior can be easily influenced by these manipulated explanations towards targeted outcomes, regardless of the intent being adversarial or benign. Furthermore, individuals often fail to detect any anomalies in these explanations, despite their decisions being affected by them. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10461v1-abstract-full').style.display = 'none'; document.getElementById('2411.10461v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06019">arXiv:2411.06019</a> <span> [<a href="https://arxiv.org/pdf/2411.06019">pdf</a>, <a href="https://arxiv.org/format/2411.06019">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> GaussianSpa: An "Optimizing-Sparsifying" Simplification Framework for Compact and High-Quality 3D Gaussian Splatting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yangming Zhang</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+W">Wenqi Jia</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+W">Wei Niu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Miao Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06019v1-abstract-short" style="display: inline;"> 3D Gaussian Splatting (3DGS) has emerged as a mainstream for novel view synthesis, leveraging continuous aggregations of Gaussian functions to model scene geometry. However, 3DGS suffers from substantial memory requirements to store the multitude of Gaussians, hindering its practicality. To address this challenge, we introduce GaussianSpa, an optimization-based simplification framework for compact… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06019v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06019v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06019v1-abstract-full" style="display: none;"> 3D Gaussian Splatting (3DGS) has emerged as a mainstream for novel view synthesis, leveraging continuous aggregations of Gaussian functions to model scene geometry. However, 3DGS suffers from substantial memory requirements to store the multitude of Gaussians, hindering its practicality. To address this challenge, we introduce GaussianSpa, an optimization-based simplification framework for compact and high-quality 3DGS. Specifically, we formulate the simplification as an optimization problem associated with the 3DGS training. Correspondingly, we propose an efficient "optimizing-sparsifying" solution that alternately solves two independent sub-problems, gradually imposing strong sparsity onto the Gaussians in the training process. Our comprehensive evaluations on various datasets show the superiority of GaussianSpa over existing state-of-the-art approaches. Notably, GaussianSpa achieves an average PSNR improvement of 0.9 dB on the real-world Deep Blending dataset with 10$\times$ fewer Gaussians compared to the vanilla 3DGS. Our project page is available at https://gaussianspa.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06019v1-abstract-full').style.display = 'none'; document.getElementById('2411.06019v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page at https://gaussianspa.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04138">arXiv:2411.04138</a> <span> [<a href="https://arxiv.org/pdf/2411.04138">pdf</a>, <a href="https://arxiv.org/format/2411.04138">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> NetworkGym: Reinforcement Learning Environments for Multi-Access Traffic Management in Network Simulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Haider%2C+M">Momin Haider</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Ming Yin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Menglei Zhang</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+A">Arpit Gupta</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jing Zhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu-Xiang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04138v1-abstract-short" style="display: inline;"> Mobile devices such as smartphones, laptops, and tablets can often connect to multiple access networks (e.g., Wi-Fi, LTE, and 5G) simultaneously. Recent advancements facilitate seamless integration of these connections below the transport layer, enhancing the experience for apps that lack inherent multi-path support. This optimization hinges on dynamically determining the traffic distribution acro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04138v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04138v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04138v1-abstract-full" style="display: none;"> Mobile devices such as smartphones, laptops, and tablets can often connect to multiple access networks (e.g., Wi-Fi, LTE, and 5G) simultaneously. Recent advancements facilitate seamless integration of these connections below the transport layer, enhancing the experience for apps that lack inherent multi-path support. This optimization hinges on dynamically determining the traffic distribution across networks for each device, a process referred to as \textit{multi-access traffic splitting}. This paper introduces \textit{NetworkGym}, a high-fidelity network environment simulator that facilitates generating multiple network traffic flows and multi-access traffic splitting. This simulator facilitates training and evaluating different RL-based solutions for the multi-access traffic splitting problem. Our initial explorations demonstrate that the majority of existing state-of-the-art offline RL algorithms (e.g. CQL) fail to outperform certain hand-crafted heuristic policies on average. This illustrates the urgent need to evaluate offline RL algorithms against a broader range of benchmarks, rather than relying solely on popular ones such as D4RL. We also propose an extension to the TD3+BC algorithm, named Pessimistic TD3 (PTD3), and demonstrate that it outperforms many state-of-the-art offline RL algorithms. PTD3's behavioral constraint mechanism, which relies on value-function pessimism, is theoretically motivated and relatively simple to implement. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04138v1-abstract-full').style.display = 'none'; document.getElementById('2411.04138v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS (Datasets and Benchmarks)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02120">arXiv:2411.02120</a> <span> [<a href="https://arxiv.org/pdf/2411.02120">pdf</a>, <a href="https://arxiv.org/format/2411.02120">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Bridge-IF: Learning Inverse Protein Folding with Markov Bridges </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yiheng Zhu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jialu Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qiuyi Li</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+J">Jiahuan Yan</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mingze Yin</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+W">Wei Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Mingyang Li</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+J">Jieping Ye</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zheng Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jian Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02120v1-abstract-short" style="display: inline;"> Inverse protein folding is a fundamental task in computational protein design, which aims to design protein sequences that fold into the desired backbone structures. While the development of machine learning algorithms for this task has seen significant success, the prevailing approaches, which predominantly employ a discriminative formulation, frequently encounter the error accumulation issue and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02120v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02120v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02120v1-abstract-full" style="display: none;"> Inverse protein folding is a fundamental task in computational protein design, which aims to design protein sequences that fold into the desired backbone structures. While the development of machine learning algorithms for this task has seen significant success, the prevailing approaches, which predominantly employ a discriminative formulation, frequently encounter the error accumulation issue and often fail to capture the extensive variety of plausible sequences. To fill these gaps, we propose Bridge-IF, a generative diffusion bridge model for inverse folding, which is designed to learn the probabilistic dependency between the distributions of backbone structures and protein sequences. Specifically, we harness an expressive structure encoder to propose a discrete, informative prior derived from structures, and establish a Markov bridge to connect this prior with native sequences. During the inference stage, Bridge-IF progressively refines the prior sequence, culminating in a more plausible design. Moreover, we introduce a reparameterization perspective on Markov bridge models, from which we derive a simplified loss function that facilitates more effective training. We also modulate protein language models (PLMs) with structural conditions to precisely approximate the Markov bridge process, thereby significantly enhancing generation performance while maintaining parameter-efficient training. Extensive experiments on well-established benchmarks demonstrate that Bridge-IF predominantly surpasses existing baselines in sequence recovery and excels in the design of plausible proteins with high foldability. The code is available at https://github.com/violet-sto/Bridge-IF. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02120v1-abstract-full').style.display = 'none'; document.getElementById('2411.02120v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01016">arXiv:2411.01016</a> <span> [<a href="https://arxiv.org/pdf/2411.01016">pdf</a>, <a href="https://arxiv.org/format/2411.01016">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MoE-I$^2$: Compressing Mixture of Experts Models through Inter-Expert Pruning and Intra-Expert Low-Rank Decomposition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+C">Cheng Yang</a>, <a href="/search/cs?searchtype=author&query=Sui%2C+Y">Yang Sui</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+J">Jinqi Xiao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+L">Lingyi Huang</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+Y">Yu Gong</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+Y">Yuanlin Duan</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+W">Wenqi Jia</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Miao Yin</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yu Cheng</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+B">Bo Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01016v1-abstract-short" style="display: inline;"> The emergence of Mixture of Experts (MoE) LLMs has significantly advanced the development of language models. Compared to traditional LLMs, MoE LLMs outperform traditional LLMs by achieving higher performance with considerably fewer activated parameters. Despite this efficiency, their enormous parameter size still leads to high deployment costs. In this paper, we introduce a two-stage compression… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01016v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01016v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01016v1-abstract-full" style="display: none;"> The emergence of Mixture of Experts (MoE) LLMs has significantly advanced the development of language models. Compared to traditional LLMs, MoE LLMs outperform traditional LLMs by achieving higher performance with considerably fewer activated parameters. Despite this efficiency, their enormous parameter size still leads to high deployment costs. In this paper, we introduce a two-stage compression method tailored for MoE to reduce the model size and decrease the computational cost. First, in the inter-expert pruning stage, we analyze the importance of each layer and propose the Layer-wise Genetic Search and Block-wise KT-Reception Field with the non-uniform pruning ratio to prune the individual expert. Second, in the intra-expert decomposition stage, we apply the low-rank decomposition to further compress the parameters within the remaining experts. Extensive experiments on Qwen1.5-MoE-A2.7B, DeepSeek-V2-Lite, and Mixtral-8$\times$7B demonstrate that our proposed methods can both reduce the model size and enhance inference efficiency while maintaining performance in various zero-shot tasks. The code will be available at \url{https://github.com/xiaochengsky/MoEI-2.git} <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01016v1-abstract-full').style.display = 'none'; document.getElementById('2411.01016v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00841">arXiv:2411.00841</a> <span> [<a href="https://arxiv.org/pdf/2411.00841">pdf</a>, <a href="https://arxiv.org/format/2411.00841">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> A Theoretical Perspective for Speculative Decoding Algorithm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yin%2C+M">Ming Yin</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Minshuo Chen</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaixuan Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+M">Mengdi Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00841v1-abstract-short" style="display: inline;"> Transformer-based autoregressive sampling has been the major bottleneck for slowing down large language model inferences. One effective way to accelerate inference is \emph{Speculative Decoding}, which employs a small model to sample a sequence of draft tokens and a large model to validate. Given its empirical effectiveness, the theoretical understanding of Speculative Decoding is falling behind.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00841v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00841v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00841v1-abstract-full" style="display: none;"> Transformer-based autoregressive sampling has been the major bottleneck for slowing down large language model inferences. One effective way to accelerate inference is \emph{Speculative Decoding}, which employs a small model to sample a sequence of draft tokens and a large model to validate. Given its empirical effectiveness, the theoretical understanding of Speculative Decoding is falling behind. This paper tackles this gap by conceptualizing the decoding problem via markov chain abstraction and studying the key properties, \emph{output quality and inference acceleration}, from a theoretical perspective. Our analysis covers the theoretical limits of speculative decoding, batch algorithms, and output quality-inference acceleration tradeoffs. Our results reveal the fundamental connections between different components of LLMs via total variation distances and show how they jointly affect the efficiency of decoding algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00841v1-abstract-full').style.display = 'none'; document.getElementById('2411.00841v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00078">arXiv:2411.00078</a> <span> [<a href="https://arxiv.org/pdf/2411.00078">pdf</a>, <a href="https://arxiv.org/format/2411.00078">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> How Good Are We? Evaluating Cell AI Foundation Models in Kidney Pathology with Human-in-the-Loop Enrichment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+J">Junlin Guo</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+S">Siqi Lu</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+C">Can Cui</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+R">Ruining Deng</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+T">Tianyuan Yao</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+Z">Zhewen Tao</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Yizhe Lin</a>, <a href="/search/cs?searchtype=author&query=Lionts%2C+M">Marilyn Lionts</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Quan Liu</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+J">Juming Xiong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu Wang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Shilin Zhao</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+C">Catie Chang</a>, <a href="/search/cs?searchtype=author&query=Wilkes%2C+M">Mitchell Wilkes</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mengmeng Yin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Haichun Yang</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+Y">Yuankai Huo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00078v1-abstract-short" style="display: inline;"> Training AI foundation models has emerged as a promising large-scale learning approach for addressing real-world healthcare challenges, including digital pathology. While many of these models have been developed for tasks like disease diagnosis and tissue quantification using extensive and diverse training datasets, their readiness for deployment on some arguably simplest tasks, such as nuclei seg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00078v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00078v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00078v1-abstract-full" style="display: none;"> Training AI foundation models has emerged as a promising large-scale learning approach for addressing real-world healthcare challenges, including digital pathology. While many of these models have been developed for tasks like disease diagnosis and tissue quantification using extensive and diverse training datasets, their readiness for deployment on some arguably simplest tasks, such as nuclei segmentation within a single organ (e.g., the kidney), remains uncertain. This paper seeks to answer this key question, "How good are we?", by thoroughly evaluating the performance of recent cell foundation models on a curated multi-center, multi-disease, and multi-species external testing dataset. Additionally, we tackle a more challenging question, "How can we improve?", by developing and assessing human-in-the-loop data enrichment strategies aimed at enhancing model performance while minimizing the reliance on pixel-level human annotation. To address the first question, we curated a multicenter, multidisease, and multispecies dataset consisting of 2,542 kidney whole slide images (WSIs). Three state-of-the-art (SOTA) cell foundation models-Cellpose, StarDist, and CellViT-were selected for evaluation. To tackle the second question, we explored data enrichment algorithms by distilling predictions from the different foundation models with a human-in-the-loop framework, aiming to further enhance foundation model performance with minimal human efforts. Our experimental results showed that all three foundation models improved over their baselines with model fine-tuning with enriched data. Interestingly, the baseline model with the highest F1 score does not yield the best segmentation outcomes after fine-tuning. This study establishes a benchmark for the development and deployment of cell vision foundation models tailored for real-world data applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00078v1-abstract-full').style.display = 'none'; document.getElementById('2411.00078v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20290">arXiv:2410.20290</a> <span> [<a href="https://arxiv.org/pdf/2410.20290">pdf</a>, <a href="https://arxiv.org/format/2410.20290">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Fast Best-of-N Decoding via Speculative Rejection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+H">Hanshi Sun</a>, <a href="/search/cs?searchtype=author&query=Haider%2C+M">Momin Haider</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruiqi Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Huitao Yang</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+J">Jiahao Qiu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Ming Yin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+M">Mengdi Wang</a>, <a href="/search/cs?searchtype=author&query=Bartlett%2C+P">Peter Bartlett</a>, <a href="/search/cs?searchtype=author&query=Zanette%2C+A">Andrea Zanette</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20290v2-abstract-short" style="display: inline;"> The safe and effective deployment of Large Language Models (LLMs) involves a critical step called alignment, which ensures that the model's responses are in accordance with human preferences. Prevalent alignment techniques, such as DPO, PPO and their variants, align LLMs by changing the pre-trained model weights during a phase called post-training. While predominant, these post-training methods ad… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20290v2-abstract-full').style.display = 'inline'; document.getElementById('2410.20290v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20290v2-abstract-full" style="display: none;"> The safe and effective deployment of Large Language Models (LLMs) involves a critical step called alignment, which ensures that the model's responses are in accordance with human preferences. Prevalent alignment techniques, such as DPO, PPO and their variants, align LLMs by changing the pre-trained model weights during a phase called post-training. While predominant, these post-training methods add substantial complexity before LLMs can be deployed. Inference-time alignment methods avoid the complex post-training step and instead bias the generation towards responses that are aligned with human preferences. The best-known inference-time alignment method, called Best-of-N, is as effective as the state-of-the-art post-training procedures. Unfortunately, Best-of-N requires vastly more resources at inference time than standard decoding strategies, which makes it computationally not viable. In this work, we introduce Speculative Rejection, a computationally-viable inference-time alignment algorithm. It generates high-scoring responses according to a given reward model, like Best-of-N does, while being between 16 to 32 times more computationally efficient. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20290v2-abstract-full').style.display = 'none'; document.getElementById('2410.20290v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14620">arXiv:2410.14620</a> <span> [<a href="https://arxiv.org/pdf/2410.14620">pdf</a>, <a href="https://arxiv.org/format/2410.14620">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Site-Specific Outdoor Propagation Assessment and Ray-Tracing Analysis for Wireless Digital Twins </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Aram%2C+M+G">Morteza Ghaderi Aram</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+H">Hao Guo</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mingsheng Yin</a>, <a href="/search/cs?searchtype=author&query=Svensson%2C+T">Tommy Svensson</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14620v1-abstract-short" style="display: inline;"> Digital twinning is becoming increasingly vital in the design and real-time control of future wireless networks by providing precise cost-effective simulations, predictive insights, and real-time data integration. This paper explores the application of digital twinning in optimizing wireless communication systems within urban environments, where building arrangements can critically impact network… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14620v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14620v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14620v1-abstract-full" style="display: none;"> Digital twinning is becoming increasingly vital in the design and real-time control of future wireless networks by providing precise cost-effective simulations, predictive insights, and real-time data integration. This paper explores the application of digital twinning in optimizing wireless communication systems within urban environments, where building arrangements can critically impact network performances. We develop a digital twin platform to simulate and analyze how factors such as building positioning, base station placement, and antenna design influence wireless propagation. The ray-tracing software package of Matlab is compared with Remcom Wireless InSite. Using a realistic radiation pattern of a base transceiver station (BTS) antenna, ray tracing simulations for signal propagation and interactions in urban landscapes are then extensively examined. By analyzing radio heat maps alongside antenna patterns, we gain valuable insights into optimizing wireless deployment strategies. This study highlights the potential of digital twinning as a critical tool for urban planners and network engineers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14620v1-abstract-full').style.display = 'none'; document.getElementById('2410.14620v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05877">arXiv:2410.05877</a> <span> [<a href="https://arxiv.org/pdf/2410.05877">pdf</a>, <a href="https://arxiv.org/format/2410.05877">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> MDAP: A Multi-view Disentangled and Adaptive Preference Learning Framework for Cross-Domain Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tong%2C+J">Junxiong Tong</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mingjia Yin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+Q">Qiushi Pan</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+D">Defu Lian</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+E">Enhong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05877v1-abstract-short" style="display: inline;"> Cross-domain Recommendation systems leverage multi-domain user interactions to improve performance, especially in sparse data or new user scenarios. However, CDR faces challenges such as effectively capturing user preferences and avoiding negative transfer. To address these issues, we propose the Multi-view Disentangled and Adaptive Preference Learning (MDAP) framework. Our MDAP framework uses a m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05877v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05877v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05877v1-abstract-full" style="display: none;"> Cross-domain Recommendation systems leverage multi-domain user interactions to improve performance, especially in sparse data or new user scenarios. However, CDR faces challenges such as effectively capturing user preferences and avoiding negative transfer. To address these issues, we propose the Multi-view Disentangled and Adaptive Preference Learning (MDAP) framework. Our MDAP framework uses a multiview encoder to capture diverse user preferences. The framework includes a gated decoder that adaptively combines embeddings from different views to generate a comprehensive user representation. By disentangling representations and allowing adaptive feature selection, our model enhances adaptability and effectiveness. Extensive experiments on benchmark datasets demonstrate that our method significantly outperforms state-of-the-art CDR and single-domain models, providing more accurate recommendations and deeper insights into user behavior across different domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05877v1-abstract-full').style.display = 'none'; document.getElementById('2410.05877v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The International Web Information Systems Engineering conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04545">arXiv:2410.04545</a> <span> [<a href="https://arxiv.org/pdf/2410.04545">pdf</a>, <a href="https://arxiv.org/format/2410.04545">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> How Does the Disclosure of AI Assistance Affect the Perceptions of Writing? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuoyan Li</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+C">Chen Liang</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+J">Jing Peng</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Ming Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04545v1-abstract-short" style="display: inline;"> Recent advances in generative AI technologies like large language models have boosted the incorporation of AI assistance in writing workflows, leading to the rise of a new paradigm of human-AI co-creation in writing. To understand how people perceive writings that are produced under this paradigm, in this paper, we conduct an experimental study to understand whether and how the disclosure of the l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04545v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04545v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04545v1-abstract-full" style="display: none;"> Recent advances in generative AI technologies like large language models have boosted the incorporation of AI assistance in writing workflows, leading to the rise of a new paradigm of human-AI co-creation in writing. To understand how people perceive writings that are produced under this paradigm, in this paper, we conduct an experimental study to understand whether and how the disclosure of the level and type of AI assistance in the writing process would affect people's perceptions of the writing on various aspects, including their evaluation on the quality of the writing and their ranking of different writings. Our results suggest that disclosing the AI assistance in the writing process, especially if AI has provided assistance in generating new content, decreases the average quality ratings for both argumentative essays and creative stories. This decrease in the average quality ratings often comes with an increased level of variations in different individuals' quality evaluations of the same writing. Indeed, factors such as an individual's writing confidence and familiarity with AI writing assistants are shown to moderate the impact of AI assistance disclosure on their writing quality evaluations. We also find that disclosing the use of AI assistance may significantly reduce the proportion of writings produced with AI's content generation assistance among the top-ranked writings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04545v1-abstract-full').style.display = 'none'; document.getElementById('2410.04545v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2024. arXiv admin note: text overlap with arXiv:2403.12004</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04346">arXiv:2410.04346</a> <span> [<a href="https://arxiv.org/pdf/2410.04346">pdf</a>, <a href="https://arxiv.org/format/2410.04346">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Ordinal Preference Optimization: Aligning Human Preferences via NDCG </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yang Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yixin Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mingzhang Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04346v1-abstract-short" style="display: inline;"> Aligning Large Language Models (LLMs) with diverse human preferences is a pivotal technique for controlling model behaviors and enhancing generation quality. Reinforcement Learning from Human Feedback (RLHF), Direct Preference Optimization (DPO), and their variants optimize language models by pairwise comparisons. However, when multiple responses are available, these approaches fall short of lever… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04346v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04346v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04346v1-abstract-full" style="display: none;"> Aligning Large Language Models (LLMs) with diverse human preferences is a pivotal technique for controlling model behaviors and enhancing generation quality. Reinforcement Learning from Human Feedback (RLHF), Direct Preference Optimization (DPO), and their variants optimize language models by pairwise comparisons. However, when multiple responses are available, these approaches fall short of leveraging the extensive information in the ranking given by the reward models or human feedback. In this work, we propose a novel listwise approach named Ordinal Preference Optimization (OPO), which employs the Normalized Discounted Cumulative Gain (NDCG), a widely-used ranking metric, to better utilize relative proximity within ordinal multiple responses. We develop an end-to-end preference optimization algorithm by approximating NDCG with a differentiable surrogate loss. This approach builds a connection between ranking models in information retrieval and the alignment problem. In aligning multi-response datasets assigned with ordinal rewards, OPO outperforms existing pairwise and listwise approaches on evaluation sets and general benchmarks like AlpacaEval. Moreover, we demonstrate that increasing the pool of negative samples can enhance model performance by reducing the adverse effects of trivial negatives. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04346v1-abstract-full').style.display = 'none'; document.getElementById('2410.04346v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03553">arXiv:2410.03553</a> <span> [<a href="https://arxiv.org/pdf/2410.03553">pdf</a>, <a href="https://arxiv.org/format/2410.03553">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> Structure-Enhanced Protein Instruction Tuning: Towards General-Purpose Protein Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+W">Wei Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chao Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Liyi Chen</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mingze Yin</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yiheng Zhu</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+K">Kun Fu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+J">Jieping Ye</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+H">Hui Xiong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zheng Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03553v2-abstract-short" style="display: inline;"> Proteins, as essential biomolecules, play a central role in biological processes, including metabolic reactions and DNA replication. Accurate prediction of their properties and functions is crucial in biological applications. Recent development of protein language models (pLMs) with supervised fine tuning provides a promising solution to this problem. However, the fine-tuned model is tailored for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03553v2-abstract-full').style.display = 'inline'; document.getElementById('2410.03553v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03553v2-abstract-full" style="display: none;"> Proteins, as essential biomolecules, play a central role in biological processes, including metabolic reactions and DNA replication. Accurate prediction of their properties and functions is crucial in biological applications. Recent development of protein language models (pLMs) with supervised fine tuning provides a promising solution to this problem. However, the fine-tuned model is tailored for particular downstream prediction task, and achieving general-purpose protein understanding remains a challenge. In this paper, we introduce Structure-Enhanced Protein Instruction Tuning (SEPIT) framework to bridge this gap. Our approach integrates a noval structure-aware module into pLMs to inform them with structural knowledge, and then connects these enhanced pLMs to large language models (LLMs) to generate understanding of proteins. In this framework, we propose a novel two-stage instruction tuning pipeline that first establishes a basic understanding of proteins through caption-based instructions and then refines this understanding using a mixture of experts (MoEs) to learn more complex properties and functional information with the same amount of activated parameters. Moreover, we construct the largest and most comprehensive protein instruction dataset to date, which allows us to train and evaluate the general-purpose protein understanding model. Extensive experimental results on open-ended generation and closed-set answer tasks demonstrate the superior performance of SEPIT over both closed-source general LLMs and open-source LLMs trained with protein knowledge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03553v2-abstract-full').style.display = 'none'; document.getElementById('2410.03553v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.03126">arXiv:2410.03126</a> <span> [<a href="https://arxiv.org/pdf/2410.03126">pdf</a>, <a href="https://arxiv.org/format/2410.03126">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Understanding Decision Subjects' Engagement with and Perceived Fairness of AI Models When Opportunities of Qualification Improvement Exist </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gemalmaz%2C+M+A">Meric Altug Gemalmaz</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Ming Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.03126v1-abstract-short" style="display: inline;"> We explore how an AI model's decision fairness affects people's engagement with and perceived fairness of the model if they are subject to its decisions, but could repeatedly and strategically respond to these decisions. Two types of strategic responses are considered -- people could determine whether to continue interacting with the model, and whether to invest in themselves to improve their chan… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03126v1-abstract-full').style.display = 'inline'; document.getElementById('2410.03126v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.03126v1-abstract-full" style="display: none;"> We explore how an AI model's decision fairness affects people's engagement with and perceived fairness of the model if they are subject to its decisions, but could repeatedly and strategically respond to these decisions. Two types of strategic responses are considered -- people could determine whether to continue interacting with the model, and whether to invest in themselves to improve their chance of future favorable decisions from the model. Via three human-subject experiments, we found that in decision subjects' strategic, repeated interactions with an AI model, the model's decision fairness does not change their willingness to interact with the model or to improve themselves, even when the model exhibits unfairness on salient protected attributes. However, decision subjects still perceive the AI model to be less fair when it systematically biases against their group, especially if the difficulty of improving one's qualification for the favorable decision is larger for the lowly-qualified people. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.03126v1-abstract-full').style.display = 'none'; document.getElementById('2410.03126v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.18295">arXiv:2409.18295</a> <span> [<a href="https://arxiv.org/pdf/2409.18295">pdf</a>, <a href="https://arxiv.org/format/2409.18295">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Lossy Compression Through Cross-Field Information for Scientific Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Youyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+W">Wenqi Jia</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+T">Taolue Yang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Miao Yin</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+S">Sian Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.18295v1-abstract-short" style="display: inline;"> Lossy compression is one of the most effective methods for reducing the size of scientific data containing multiple data fields. It reduces information density through prediction or transformation techniques to compress the data. Previous approaches use local information from a single target field when predicting target data points, limiting their potential to achieve higher compression ratios. In… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18295v1-abstract-full').style.display = 'inline'; document.getElementById('2409.18295v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.18295v1-abstract-full" style="display: none;"> Lossy compression is one of the most effective methods for reducing the size of scientific data containing multiple data fields. It reduces information density through prediction or transformation techniques to compress the data. Previous approaches use local information from a single target field when predicting target data points, limiting their potential to achieve higher compression ratios. In this paper, we identified significant cross-field correlations within scientific datasets. We propose a novel hybrid prediction model that utilizes CNN to extract cross-field information and combine it with existing local field information. Our solution enhances the prediction accuracy of lossy compressors, leading to improved compression ratios without compromising data quality. We evaluate our solution on three scientific datasets, demonstrating its ability to improve compression ratios by up to 25% under specific error bounds. Additionally, our solution preserves more data details and reduces artifacts compared to baseline approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18295v1-abstract-full').style.display = 'none'; document.getElementById('2409.18295v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 9 figures, accepted by DRBSD-10</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17466">arXiv:2409.17466</a> <span> [<a href="https://arxiv.org/pdf/2409.17466">pdf</a>, <a href="https://arxiv.org/format/2409.17466">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Adjusting Regression Models for Conditional Uncertainty Calibration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+R">Ruijiang Gao</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mingzhang Yin</a>, <a href="/search/cs?searchtype=author&query=McInerney%2C+J">James McInerney</a>, <a href="/search/cs?searchtype=author&query=Kallus%2C+N">Nathan Kallus</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17466v1-abstract-short" style="display: inline;"> Conformal Prediction methods have finite-sample distribution-free marginal coverage guarantees. However, they generally do not offer conditional coverage guarantees, which can be important for high-stakes decisions. In this paper, we propose a novel algorithm to train a regression function to improve the conditional coverage after applying the split conformal prediction procedure. We establish an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17466v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17466v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17466v1-abstract-full" style="display: none;"> Conformal Prediction methods have finite-sample distribution-free marginal coverage guarantees. However, they generally do not offer conditional coverage guarantees, which can be important for high-stakes decisions. In this paper, we propose a novel algorithm to train a regression function to improve the conditional coverage after applying the split conformal prediction procedure. We establish an upper bound for the miscoverage gap between the conditional coverage and the nominal coverage rate and propose an end-to-end algorithm to control this upper bound. We demonstrate the efficacy of our method empirically on synthetic and real-world datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17466v1-abstract-full').style.display = 'none'; document.getElementById('2409.17466v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Machine Learning Special Issue on Uncertainty Quantification</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.07416">arXiv:2409.07416</a> <span> [<a href="https://arxiv.org/pdf/2409.07416">pdf</a>, <a href="https://arxiv.org/format/2409.07416">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Hierarchical Reinforcement Learning for Temporal Abstraction of Listwise Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ji%2C+L">Luo Ji</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Gao Liu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mingyang Yin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hongxia Yang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jingren Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.07416v1-abstract-short" style="display: inline;"> Modern listwise recommendation systems need to consider both long-term user perceptions and short-term interest shifts. Reinforcement learning can be applied on recommendation to study such a problem but is also subject to large search space, sparse user feedback and long interactive latency. Motivated by recent progress in hierarchical reinforcement learning, we propose a novel framework called m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07416v1-abstract-full').style.display = 'inline'; document.getElementById('2409.07416v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.07416v1-abstract-full" style="display: none;"> Modern listwise recommendation systems need to consider both long-term user perceptions and short-term interest shifts. Reinforcement learning can be applied on recommendation to study such a problem but is also subject to large search space, sparse user feedback and long interactive latency. Motivated by recent progress in hierarchical reinforcement learning, we propose a novel framework called mccHRL to provide different levels of temporal abstraction on listwise recommendation. Within the hierarchical framework, the high-level agent studies the evolution of user perception, while the low-level agent produces the item selection policy by modeling the process as a sequential decision-making problem. We argue that such framework has a well-defined decomposition of the outra-session context and the intra-session context, which are encoded by the high-level and low-level agents, respectively. To verify this argument, we implement both a simulator-based environment and an industrial dataset-based experiment. Results observe significant performance improvement by our method, compared with several well-known baselines. Data and codes have been made public. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07416v1-abstract-full').style.display = 'none'; document.getElementById('2409.07416v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.05785">arXiv:2409.05785</a> <span> [<a href="https://arxiv.org/pdf/2409.05785">pdf</a>, <a href="https://arxiv.org/format/2409.05785">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> NeurLZ: On Enhancing Lossy Compression Performance based on Error-Controlled Neural Learning for Scientific Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jia%2C+W">Wenqi Jia</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Youyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Z">Zhewen Hu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jinzhen Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Boyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+W">Wei Niu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Junzhou Huang</a>, <a href="/search/cs?searchtype=author&query=Kalafatis%2C+S">Stavros Kalafatis</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+S">Sian Jin</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Miao Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.05785v3-abstract-short" style="display: inline;"> Large-scale scientific simulations generate massive datasets that pose significant challenges for storage and I/O. While traditional lossy compression techniques can improve performance, balancing compression ratio, data quality, and throughput remains difficult. To address this, we propose NeurLZ, a novel cross-field learning-based and error-controlled compression framework for scientific data. B… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05785v3-abstract-full').style.display = 'inline'; document.getElementById('2409.05785v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.05785v3-abstract-full" style="display: none;"> Large-scale scientific simulations generate massive datasets that pose significant challenges for storage and I/O. While traditional lossy compression techniques can improve performance, balancing compression ratio, data quality, and throughput remains difficult. To address this, we propose NeurLZ, a novel cross-field learning-based and error-controlled compression framework for scientific data. By integrating skipping DNN models, cross-field learning, and error control, our framework aims to substantially enhance lossy compression performance. Our contributions are three-fold: (1) We design a lightweight skipping model to provide high-fidelity detail retention, further improving prediction accuracy. (2) We adopt a cross-field learning approach to significantly improve data prediction accuracy, resulting in a substantially improved compression ratio. (3) We develop an error control approach to provide strict error bounds according to user requirements. We evaluated NeurLZ on several real-world HPC application datasets, including Nyx (cosmological simulation), Miranda (large turbulence simulation), and Hurricane (weather simulation). Experiments demonstrate that our framework achieves up to a 90% relative reduction in bit rate under the same data distortion, compared to the best existing approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05785v3-abstract-full').style.display = 'none'; document.getElementById('2409.05785v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.02416">arXiv:2409.02416</a> <span> [<a href="https://arxiv.org/pdf/2409.02416">pdf</a>, <a href="https://arxiv.org/format/2409.02416">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Relative-Translation Invariant Wasserstein Distance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+B">Binshuai Wang</a>, <a href="/search/cs?searchtype=author&query=Di%2C+Q">Qiwei Di</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Ming Yin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+M">Mengdi Wang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Q">Quanquan Gu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+P">Peng Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.02416v1-abstract-short" style="display: inline;"> We introduce a new family of distances, relative-translation invariant Wasserstein distances ($RW_p$), for measuring the similarity of two probability distributions under distribution shift. Generalizing it from the classical optimal transport model, we show that $RW_p$ distances are also real distance metrics defined on the quotient set $\mathcal{P}_p(\mathbb{R}^n)/\sim$ and invariant to distribu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02416v1-abstract-full').style.display = 'inline'; document.getElementById('2409.02416v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.02416v1-abstract-full" style="display: none;"> We introduce a new family of distances, relative-translation invariant Wasserstein distances ($RW_p$), for measuring the similarity of two probability distributions under distribution shift. Generalizing it from the classical optimal transport model, we show that $RW_p$ distances are also real distance metrics defined on the quotient set $\mathcal{P}_p(\mathbb{R}^n)/\sim$ and invariant to distribution translations. When $p=2$, the $RW_2$ distance enjoys more exciting properties, including decomposability of the optimal transport model, translation-invariance of the $RW_2$ distance, and a Pythagorean relationship between $RW_2$ and the classical quadratic Wasserstein distance ($W_2$). Based on these properties, we show that a distribution shift, measured by $W_2$ distance, can be explained in the bias-variance perspective. In addition, we propose a variant of the Sinkhorn algorithm, named $RW_2$ Sinkhorn algorithm, for efficiently calculating $RW_2$ distance, coupling solutions, as well as $W_2$ distance. We also provide the analysis of numerical stability and time complexity for the proposed algorithm. Finally, we validate the $RW_2$ distance metric and the algorithm performance with three experiments. We conduct one numerical validation for the $RW_2$ Sinkhorn algorithm and show two real-world applications demonstrating the effectiveness of using $RW_2$ under distribution shift: digits recognition and similar thunderstorm detection. The experimental results report that our proposed algorithm significantly improves the computational efficiency of Sinkhorn in certain practical applications, and the $RW_2$ distance is robust to distribution translations compared with baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02416v1-abstract-full').style.display = 'none'; document.getElementById('2409.02416v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.09278">arXiv:2408.09278</a> <span> [<a href="https://arxiv.org/pdf/2408.09278">pdf</a>, <a href="https://arxiv.org/format/2408.09278">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Cross-Species Data Integration for Enhanced Layer Segmentation in Kidney Pathology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Junchao Zhu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mengmeng Yin</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+R">Ruining Deng</a>, <a href="/search/cs?searchtype=author&query=Long%2C+Y">Yitian Long</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yaohong Wang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Shilin Zhao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Haichun Yang</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+Y">Yuankai Huo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.09278v1-abstract-short" style="display: inline;"> Accurate delineation of the boundaries between the renal cortex and medulla is crucial for subsequent functional structural analysis and disease diagnosis. Training high-quality deep-learning models for layer segmentation relies on the availability of large amounts of annotated data. However, due to the patient's privacy of medical data and scarce clinical cases, constructing pathological datasets… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09278v1-abstract-full').style.display = 'inline'; document.getElementById('2408.09278v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.09278v1-abstract-full" style="display: none;"> Accurate delineation of the boundaries between the renal cortex and medulla is crucial for subsequent functional structural analysis and disease diagnosis. Training high-quality deep-learning models for layer segmentation relies on the availability of large amounts of annotated data. However, due to the patient's privacy of medical data and scarce clinical cases, constructing pathological datasets from clinical sources is relatively difficult and expensive. Moreover, using external natural image datasets introduces noise during the domain generalization process. Cross-species homologous data, such as mouse kidney data, which exhibits high structural and feature similarity to human kidneys, has the potential to enhance model performance on human datasets. In this study, we incorporated the collected private Periodic Acid-Schiff (PAS) stained mouse kidney dataset into the human kidney dataset for joint training. The results showed that after introducing cross-species homologous data, the semantic segmentation models based on CNN and Transformer architectures achieved an average increase of 1.77% and 1.24% in mIoU, and 1.76% and 0.89% in Dice score for the human renal cortex and medulla datasets, respectively. This approach is also capable of enhancing the model's generalization ability. This indicates that cross-species homologous data, as a low-noise trainable data source, can help improve model performance under conditions of limited clinical samples. Code is available at https://github.com/hrlblab/layer_segmentation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09278v1-abstract-full').style.display = 'none'; document.getElementById('2408.09278v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.06381">arXiv:2408.06381</a> <span> [<a href="https://arxiv.org/pdf/2408.06381">pdf</a>, <a href="https://arxiv.org/format/2408.06381">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Assessment of Cell Nuclei AI Foundation Models in Kidney Pathology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+J">Junlin Guo</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+S">Siqi Lu</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+C">Can Cui</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+R">Ruining Deng</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+T">Tianyuan Yao</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+Z">Zhewen Tao</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Yizhe Lin</a>, <a href="/search/cs?searchtype=author&query=Lionts%2C+M">Marilyn Lionts</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Quan Liu</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+J">Juming Xiong</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+C">Catie Chang</a>, <a href="/search/cs?searchtype=author&query=Wilkes%2C+M">Mitchell Wilkes</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mengmeng Yin</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Haichun Yang</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+Y">Yuankai Huo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.06381v1-abstract-short" style="display: inline;"> Cell nuclei instance segmentation is a crucial task in digital kidney pathology. Traditional automatic segmentation methods often lack generalizability when applied to unseen datasets. Recently, the success of foundation models (FMs) has provided a more generalizable solution, potentially enabling the segmentation of any cell type. In this study, we perform a large-scale evaluation of three widely… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06381v1-abstract-full').style.display = 'inline'; document.getElementById('2408.06381v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.06381v1-abstract-full" style="display: none;"> Cell nuclei instance segmentation is a crucial task in digital kidney pathology. Traditional automatic segmentation methods often lack generalizability when applied to unseen datasets. Recently, the success of foundation models (FMs) has provided a more generalizable solution, potentially enabling the segmentation of any cell type. In this study, we perform a large-scale evaluation of three widely used state-of-the-art (SOTA) cell nuclei foundation models (Cellpose, StarDist, and CellViT). Specifically, we created a highly diverse evaluation dataset consisting of 2,542 kidney whole slide images (WSIs) collected from both human and rodent sources, encompassing various tissue types, sizes, and staining methods. To our knowledge, this is the largest-scale evaluation of its kind to date. Our quantitative analysis of the prediction distribution reveals a persistent performance gap in kidney pathology. Among the evaluated models, CellViT demonstrated superior performance in segmenting nuclei in kidney pathology. However, none of the foundation models are perfect; a performance gap remains in general nuclei segmentation for kidney pathology. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06381v1-abstract-full').style.display = 'none'; document.getElementById('2408.06381v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05497">arXiv:2408.05497</a> <span> [<a href="https://arxiv.org/pdf/2408.05497">pdf</a>, <a href="https://arxiv.org/format/2408.05497">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> MABR: A Multilayer Adversarial Bias Removal Approach Without Prior Bias Knowledge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yin%2C+M+J">Maxwell J. Yin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Boyu Wang</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+C">Charles Ling</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05497v1-abstract-short" style="display: inline;"> Models trained on real-world data often mirror and exacerbate existing social biases. Traditional methods for mitigating these biases typically require prior knowledge of the specific biases to be addressed, such as gender or racial biases, and the social groups associated with each instance. In this paper, we introduce a novel adversarial training strategy that operates independently of prior bia… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05497v1-abstract-full').style.display = 'inline'; document.getElementById('2408.05497v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05497v1-abstract-full" style="display: none;"> Models trained on real-world data often mirror and exacerbate existing social biases. Traditional methods for mitigating these biases typically require prior knowledge of the specific biases to be addressed, such as gender or racial biases, and the social groups associated with each instance. In this paper, we introduce a novel adversarial training strategy that operates independently of prior bias-type knowledge and protected attribute labels. Our approach proactively identifies biases during model training by utilizing auxiliary models, which are trained concurrently by predicting the performance of the main model without relying on task labels. Additionally, we implement these auxiliary models at various levels of the feature maps of the main model, enabling the detection of a broader and more nuanced range of bias features. Through experiments on racial and gender biases in sentiment and occupation classification tasks, our method effectively reduces social biases without the need for demographic annotations. Moreover, our approach not only matches but often surpasses the efficacy of methods that require detailed demographic insights, marking a significant advancement in bias mitigation techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05497v1-abstract-full').style.display = 'none'; document.getElementById('2408.05497v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.02914">arXiv:2408.02914</a> <span> [<a href="https://arxiv.org/pdf/2408.02914">pdf</a>, <a href="https://arxiv.org/format/2408.02914">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> VirtualNexus: Enhancing 360-Degree Video AR/VR Collaboration with Environment Cutouts and Virtual Replicas </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xincheng Huang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Michael Yin</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Z">Ziyi Xia</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+R">Robert Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.02914v1-abstract-short" style="display: inline;"> Asymmetric AR/VR collaboration systems bring a remote VR user to a local AR user's physical environment, allowing them to communicate and work within a shared virtual/physical space. Such systems often display the remote environment through 3D reconstructions or 360-degree videos. While 360-degree cameras stream an environment in higher quality, they lack spatial information, making them less inte… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02914v1-abstract-full').style.display = 'inline'; document.getElementById('2408.02914v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.02914v1-abstract-full" style="display: none;"> Asymmetric AR/VR collaboration systems bring a remote VR user to a local AR user's physical environment, allowing them to communicate and work within a shared virtual/physical space. Such systems often display the remote environment through 3D reconstructions or 360-degree videos. While 360-degree cameras stream an environment in higher quality, they lack spatial information, making them less interactable. We present VirtualNexus, an AR/VR collaboration system that enhances 360-degree video AR/VR collaboration with environment cutouts and virtual replicas. VR users can define cutouts of the remote environment to interact with as a world-in-miniature, and their interactions are synchronized to the local AR perspective. Furthermore, AR users can rapidly scan and share 3D virtual replicas of physical objects using neural rendering. We demonstrated our system's utility through 3 example applications and evaluated our system in a dyadic usability test. VirtualNexus extends the interaction space of 360-degree telepresence systems, offering improved physical presence, versatility, and clarity in interactions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.02914v1-abstract-full').style.display = 'none'; document.getElementById('2408.02914v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 10 figures, to be published in The 37th Annual ACM Symposium on User Interface Software and Technology (UIST'24)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.20172">arXiv:2407.20172</a> <span> [<a href="https://arxiv.org/pdf/2407.20172">pdf</a>, <a href="https://arxiv.org/format/2407.20172">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LatentArtiFusion: An Effective and Efficient Histological Artifacts Restoration Framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=He%2C+Z">Zhenqi He</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wenrui Liu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Minghao Yin</a>, <a href="/search/cs?searchtype=author&query=Han%2C+K">Kai Han</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.20172v1-abstract-short" style="display: inline;"> Histological artifacts pose challenges for both pathologists and Computer-Aided Diagnosis (CAD) systems, leading to errors in analysis. Current approaches for histological artifact restoration, based on Generative Adversarial Networks (GANs) and pixel-level Diffusion Models, suffer from performance limitations and computational inefficiencies. In this paper, we propose a novel framework, LatentArt… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20172v1-abstract-full').style.display = 'inline'; document.getElementById('2407.20172v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.20172v1-abstract-full" style="display: none;"> Histological artifacts pose challenges for both pathologists and Computer-Aided Diagnosis (CAD) systems, leading to errors in analysis. Current approaches for histological artifact restoration, based on Generative Adversarial Networks (GANs) and pixel-level Diffusion Models, suffer from performance limitations and computational inefficiencies. In this paper, we propose a novel framework, LatentArtiFusion, which leverages the latent diffusion model (LDM) to reconstruct histological artifacts with high performance and computational efficiency. Unlike traditional pixel-level diffusion frameworks, LatentArtiFusion executes the restoration process in a lower-dimensional latent space, significantly improving computational efficiency. Moreover, we introduce a novel regional artifact reconstruction algorithm in latent space to prevent mistransfer in non-artifact regions, distinguishing our approach from GAN-based methods. Through extensive experiments on real-world histology datasets, LatentArtiFusion demonstrates remarkable speed, outperforming state-of-the-art pixel-level diffusion frameworks by more than 30X. It also consistently surpasses GAN-based methods by at least 5% across multiple evaluation metrics. Furthermore, we evaluate the effectiveness of our proposed framework in downstream tissue classification tasks, showcasing its practical utility. Code is available at https://github.com/bugs-creator/LatentArtiFusion. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20172v1-abstract-full').style.display = 'none'; document.getElementById('2407.20172v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accept to DGM4MICCAI2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.19296">arXiv:2407.19296</a> <span> [<a href="https://arxiv.org/pdf/2407.19296">pdf</a>, <a href="https://arxiv.org/format/2407.19296">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Multi-Modal CLIP-Informed Protein Editing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mingze Yin</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Hanjing Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yiheng Zhu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+M">Miao Lin</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yixuan Wu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jialu Wu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hongxia Xu</a>, <a href="/search/cs?searchtype=author&query=Hsieh%2C+C">Chang-Yu Hsieh</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+T">Tingjun Hou</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jintai Chen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jian Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.19296v1-abstract-short" style="display: inline;"> Proteins govern most biological functions essential for life, but achieving controllable protein discovery and optimization remains challenging. Recently, machine learning-assisted protein editing (MLPE) has shown promise in accelerating optimization cycles and reducing experimental workloads. However, current methods struggle with the vast combinatorial space of potential protein edits and cannot… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19296v1-abstract-full').style.display = 'inline'; document.getElementById('2407.19296v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.19296v1-abstract-full" style="display: none;"> Proteins govern most biological functions essential for life, but achieving controllable protein discovery and optimization remains challenging. Recently, machine learning-assisted protein editing (MLPE) has shown promise in accelerating optimization cycles and reducing experimental workloads. However, current methods struggle with the vast combinatorial space of potential protein edits and cannot explicitly conduct protein editing using biotext instructions, limiting their interactivity with human feedback. To fill these gaps, we propose a novel method called ProtET for efficient CLIP-informed protein editing through multi-modality learning. Our approach comprises two stages: in the pretraining stage, contrastive learning aligns protein-biotext representations encoded by two large language models (LLMs), respectively. Subsequently, during the protein editing stage, the fused features from editing instruction texts and original protein sequences serve as the final editing condition for generating target protein sequences. Comprehensive experiments demonstrated the superiority of ProtET in editing proteins to enhance human-expected functionality across multiple attribute domains, including enzyme catalytic activity, protein stability and antibody specific binding ability. And ProtET improves the state-of-the-art results by a large margin, leading to significant stability improvements of 16.67% and 16.90%. This capability positions ProtET to advance real-world artificial protein editing, potentially addressing unmet academic, industrial, and clinical needs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.19296v1-abstract-full').style.display = 'none'; document.getElementById('2407.19296v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 7 figures, 5 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Health Data Science, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.18390">arXiv:2407.18390</a> <span> [<a href="https://arxiv.org/pdf/2407.18390">pdf</a>, <a href="https://arxiv.org/format/2407.18390">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Adapting Mouse Pathological Model to Human Glomerular Lesion Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+L">Lining Yu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mengmeng Yin</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+R">Ruining Deng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Quan Liu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+T">Tianyuan Yao</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+C">Can Cui</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yaohong Wang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Shilin Zhao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Haichun Yang</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+Y">Yuankai Huo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.18390v1-abstract-short" style="display: inline;"> Moving from animal models to human applications in preclinical research encompasses a broad spectrum of disciplines in medical science. A fundamental element in the development of new drugs, treatments, diagnostic methods, and in deepening our understanding of disease processes is the accurate measurement of kidney tissues. Past studies have demonstrated the viability of translating glomeruli segm… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18390v1-abstract-full').style.display = 'inline'; document.getElementById('2407.18390v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.18390v1-abstract-full" style="display: none;"> Moving from animal models to human applications in preclinical research encompasses a broad spectrum of disciplines in medical science. A fundamental element in the development of new drugs, treatments, diagnostic methods, and in deepening our understanding of disease processes is the accurate measurement of kidney tissues. Past studies have demonstrated the viability of translating glomeruli segmentation techniques from mouse models to human applications. Yet, these investigations tend to neglect the complexities involved in segmenting pathological glomeruli affected by different lesions. Such lesions present a wider range of morphological variations compared to healthy glomerular tissue, which are arguably more valuable than normal glomeruli in clinical practice. Furthermore, data on lesions from animal models can be more readily scaled up from disease models and whole kidney biopsies. This brings up a question: ``\textit{Can a pathological segmentation model trained on mouse models be effectively applied to human patients?}" To answer this question, we introduced GLAM, a deep learning study for fine-grained segmentation of human kidney lesions using a mouse model, addressing mouse-to-human transfer learning, by evaluating different learning strategies for segmenting human pathological lesions using zero-shot transfer learning and hybrid learning by leveraging mouse samples. From the results, the hybrid learning model achieved superior performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18390v1-abstract-full').style.display = 'none'; document.getElementById('2407.18390v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.06645">arXiv:2407.06645</a> <span> [<a href="https://arxiv.org/pdf/2407.06645">pdf</a>, <a href="https://arxiv.org/format/2407.06645">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Entropy Law: The Story Behind Data Compression and LLM Performance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mingjia Yin</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+C">Chuhan Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yufei Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wei Guo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yasheng Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yong Liu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+R">Ruiming Tang</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+D">Defu Lian</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+E">Enhong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.06645v3-abstract-short" style="display: inline;"> Data is the cornerstone of large language models (LLMs), but not all data is useful for model learning. Carefully selected data can better elicit the capabilities of LLMs with much less computational overhead. Most methods concentrate on evaluating the quality of individual samples in data selection, while the combinatorial effects among samples are neglected. Even if each sample is of perfect qua… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06645v3-abstract-full').style.display = 'inline'; document.getElementById('2407.06645v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.06645v3-abstract-full" style="display: none;"> Data is the cornerstone of large language models (LLMs), but not all data is useful for model learning. Carefully selected data can better elicit the capabilities of LLMs with much less computational overhead. Most methods concentrate on evaluating the quality of individual samples in data selection, while the combinatorial effects among samples are neglected. Even if each sample is of perfect quality, their combinations may be suboptimal in teaching LLMs due to their intrinsic homogeneity or contradiction. In this paper, we aim to uncover the underlying relationships between LLM performance and data selection. Inspired by the information compression nature of LLMs, we uncover an ``entropy law'' that connects LLM performance with data compression ratio and first-epoch training loss, which reflect the information redundancy of a dataset and the mastery of inherent knowledge encoded in this dataset, respectively. Through both theoretical deduction and empirical evaluation, we find that model performance is negatively correlated to the compression ratio of training data, which usually yields a lower training loss. Based on the findings of the entropy law, we propose a quite efficient and universal data selection method named \textbf{ZIP} for training LLMs, which aim to prioritize data subsets exhibiting a low compression ratio. Based on a multi-stage algorithm that selects diverse data in a greedy manner, we can obtain a good data subset with satisfactory diversity. Extensive experiments have been conducted to validate the entropy law and the superiority of ZIP across different LLM backbones and alignment stages. We also present an interesting application of entropy law that can detect potential performance risks at the beginning of model training. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06645v3-abstract-full').style.display = 'none'; document.getElementById('2407.06645v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.06309">arXiv:2407.06309</a> <span> [<a href="https://arxiv.org/pdf/2407.06309">pdf</a>, <a href="https://arxiv.org/format/2407.06309">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Multimodal Chain-of-Thought Reasoning via ChatGPT to Protect Children from Age-Inappropriate Apps </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+C">Chuanbo Hu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+B">Bin Liu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Minglei Yin</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yilu Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xin Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.06309v1-abstract-short" style="display: inline;"> Mobile applications (Apps) could expose children to inappropriate themes such as sexual content, violence, and drug use. Maturity rating offers a quick and effective method for potential users, particularly guardians, to assess the maturity levels of apps. Determining accurate maturity ratings for mobile apps is essential to protect children's health in today's saturated digital marketplace. Exist… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06309v1-abstract-full').style.display = 'inline'; document.getElementById('2407.06309v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.06309v1-abstract-full" style="display: none;"> Mobile applications (Apps) could expose children to inappropriate themes such as sexual content, violence, and drug use. Maturity rating offers a quick and effective method for potential users, particularly guardians, to assess the maturity levels of apps. Determining accurate maturity ratings for mobile apps is essential to protect children's health in today's saturated digital marketplace. Existing approaches to maturity rating are either inaccurate (e.g., self-reported rating by developers) or costly (e.g., manual examination). In the literature, there are few text-mining-based approaches to maturity rating. However, each app typically involves multiple modalities, namely app description in the text, and screenshots in the image. In this paper, we present a framework for determining app maturity levels that utilize multimodal large language models (MLLMs), specifically ChatGPT-4 Vision. Powered by Chain-of-Thought (CoT) reasoning, our framework systematically leverages ChatGPT-4 to process multimodal app data (i.e., textual descriptions and screenshots) and guide the MLLM model through a step-by-step reasoning pathway from initial content analysis to final maturity rating determination. As a result, through explicitly incorporating CoT reasoning, our framework enables ChatGPT to understand better and apply maturity policies to facilitate maturity rating. Experimental results indicate that the proposed method outperforms all baseline models and other fusion strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06309v1-abstract-full').style.display = 'none'; document.getElementById('2407.06309v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.03307">arXiv:2407.03307</a> <span> [<a href="https://arxiv.org/pdf/2407.03307">pdf</a>, <a href="https://arxiv.org/format/2407.03307">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> HoloHisto: End-to-end Gigapixel WSI Segmentation with 4K Resolution Sequential Tokenization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yucheng Tang</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yufan He</a>, <a href="/search/cs?searchtype=author&query=Nath%2C+V">Vishwesh Nath</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+P">Pengfeig Guo</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+R">Ruining Deng</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+T">Tianyuan Yao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Quan Liu</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+C">Can Cui</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mengmeng Yin</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Ziyue Xu</a>, <a href="/search/cs?searchtype=author&query=Roth%2C+H">Holger Roth</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+D">Daguang Xu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Haichun Yang</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+Y">Yuankai Huo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.03307v1-abstract-short" style="display: inline;"> In digital pathology, the traditional method for deep learning-based image segmentation typically involves a two-stage process: initially segmenting high-resolution whole slide images (WSI) into smaller patches (e.g., 256x256, 512x512, 1024x1024) and subsequently reconstructing them to their original scale. This method often struggles to capture the complex details and vast scope of WSIs. In this… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03307v1-abstract-full').style.display = 'inline'; document.getElementById('2407.03307v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.03307v1-abstract-full" style="display: none;"> In digital pathology, the traditional method for deep learning-based image segmentation typically involves a two-stage process: initially segmenting high-resolution whole slide images (WSI) into smaller patches (e.g., 256x256, 512x512, 1024x1024) and subsequently reconstructing them to their original scale. This method often struggles to capture the complex details and vast scope of WSIs. In this paper, we propose the holistic histopathology (HoloHisto) segmentation method to achieve end-to-end segmentation on gigapixel WSIs, whose maximum resolution is above 80,000$\times$70,000 pixels. HoloHisto fundamentally shifts the paradigm of WSI segmentation to an end-to-end learning fashion with 1) a large (4K) resolution base patch for elevated visual information inclusion and efficient processing, and 2) a novel sequential tokenization mechanism to properly model the contextual relationships and efficiently model the rich information from the 4K input. To our best knowledge, HoloHisto presents the first holistic approach for gigapixel resolution WSI segmentation, supporting direct I/O of complete WSI and their corresponding gigapixel masks. Under the HoloHisto platform, we unveil a random 4K sampler that transcends ultra-high resolution, delivering 31 and 10 times more pixels than standard 2D and 3D patches, respectively, for advancing computational capabilities. To facilitate efficient 4K resolution dense prediction, we leverage sequential tokenization, utilizing a pre-trained image tokenizer to group image features into a discrete token grid. To assess the performance, our team curated a new kidney pathology image segmentation (KPIs) dataset with WSI-level glomeruli segmentation from whole mouse kidneys. From the results, HoloHisto-4K delivers remarkable performance gains over previous state-of-the-art models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03307v1-abstract-full').style.display = 'none'; document.getElementById('2407.03307v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.00596">arXiv:2407.00596</a> <span> [<a href="https://arxiv.org/pdf/2407.00596">pdf</a>, <a href="https://arxiv.org/format/2407.00596">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> HATs: Hierarchical Adaptive Taxonomy Segmentation for Panoramic Pathology Image Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deng%2C+R">Ruining Deng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Quan Liu</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+C">Can Cui</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+T">Tianyuan Yao</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+J">Juming Xiong</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+S">Shunxing Bao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mengmeng Yin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu Wang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Shilin Zhao</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yucheng Tang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Haichun Yang</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+Y">Yuankai Huo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.00596v1-abstract-short" style="display: inline;"> Panoramic image segmentation in computational pathology presents a remarkable challenge due to the morphologically complex and variably scaled anatomy. For instance, the intricate organization in kidney pathology spans multiple layers, from regions like the cortex and medulla to functional units such as glomeruli, tubules, and vessels, down to various cell types. In this paper, we propose a novel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00596v1-abstract-full').style.display = 'inline'; document.getElementById('2407.00596v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.00596v1-abstract-full" style="display: none;"> Panoramic image segmentation in computational pathology presents a remarkable challenge due to the morphologically complex and variably scaled anatomy. For instance, the intricate organization in kidney pathology spans multiple layers, from regions like the cortex and medulla to functional units such as glomeruli, tubules, and vessels, down to various cell types. In this paper, we propose a novel Hierarchical Adaptive Taxonomy Segmentation (HATs) method, which is designed to thoroughly segment panoramic views of kidney structures by leveraging detailed anatomical insights. Our approach entails (1) the innovative HATs technique which translates spatial relationships among 15 distinct object classes into a versatile "plug-and-play" loss function that spans across regions, functional units, and cells, (2) the incorporation of anatomical hierarchies and scale considerations into a unified simple matrix representation for all panoramic entities, (3) the adoption of the latest AI foundation model (EfficientSAM) as a feature extraction tool to boost the model's adaptability, yet eliminating the need for manual prompt generation in conventional segment anything model (SAM). Experimental findings demonstrate that the HATs method offers an efficient and effective strategy for integrating clinical insights and imaging precedents into a unified segmentation model across more than 15 categories. The official implementation is publicly available at https://github.com/hrlblab/HATs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00596v1-abstract-full').style.display = 'none'; document.getElementById('2407.00596v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2402.19286</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.00030">arXiv:2407.00030</a> <span> [<a href="https://arxiv.org/pdf/2407.00030">pdf</a>, <a href="https://arxiv.org/format/2407.00030">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> On Orchestrating Parallel Broadcasts for Distributed Ledgers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sheng%2C+P">Peiyao Sheng</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+C">Chenyuan Wu</a>, <a href="/search/cs?searchtype=author&query=Malkhi%2C+D">Dahlia Malkhi</a>, <a href="/search/cs?searchtype=author&query=Reiter%2C+M+K">Michael K. Reiter</a>, <a href="/search/cs?searchtype=author&query=Stathakopoulou%2C+C">Chrysoula Stathakopoulou</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+M">Michael Wei</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Maofan Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.00030v1-abstract-short" style="display: inline;"> This paper introduces and develops the concept of ``ticketing'', through which atomic broadcasts are orchestrated by nodes in a distributed system. The paper studies different ticketing regimes that allow parallelism, yet prevent slow nodes from hampering overall progress. It introduces a hybrid scheme which combines managed and unmanaged ticketing regimes, striking a balance between adaptivity an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00030v1-abstract-full').style.display = 'inline'; document.getElementById('2407.00030v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.00030v1-abstract-full" style="display: none;"> This paper introduces and develops the concept of ``ticketing'', through which atomic broadcasts are orchestrated by nodes in a distributed system. The paper studies different ticketing regimes that allow parallelism, yet prevent slow nodes from hampering overall progress. It introduces a hybrid scheme which combines managed and unmanaged ticketing regimes, striking a balance between adaptivity and resilience. The performance evaluation demonstrates how managed and unmanaged ticketing regimes benefit throughput in systems with heterogeneous resources both in static and dynamic scenarios, with the managed ticketing regime performing better among the two as it adapts better. Finally, it demonstrates how using the hybrid ticketing regime performance can enjoy both the adaptivity of the managed regime and the liveness guarantees of the unmanaged regime. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00030v1-abstract-full').style.display = 'none'; document.getElementById('2407.00030v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.12404">arXiv:2406.12404</a> <span> [<a href="https://arxiv.org/pdf/2406.12404">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Scan-to-BIM for As-built Roads: Automatic Road Digital Twinning from Semantically Labeled Point Cloud Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ding%2C+Y">Yuexiong Ding</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mengtian Yin</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+R">Ran Wei</a>, <a href="/search/cs?searchtype=author&query=Brilakis%2C+I">Ioannis Brilakis</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Muyang Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+X">Xiaowei Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.12404v1-abstract-short" style="display: inline;"> Creating geometric digital twins (gDT) for as-built roads still faces many challenges, such as low automation level and accuracy, limited asset types and shapes, and reliance on engineering experience. A novel scan-to-building information modeling (scan-to-BIM) framework is proposed for automatic road gDT creation based on semantically labeled point cloud data (PCD), which considers six asset type… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12404v1-abstract-full').style.display = 'inline'; document.getElementById('2406.12404v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.12404v1-abstract-full" style="display: none;"> Creating geometric digital twins (gDT) for as-built roads still faces many challenges, such as low automation level and accuracy, limited asset types and shapes, and reliance on engineering experience. A novel scan-to-building information modeling (scan-to-BIM) framework is proposed for automatic road gDT creation based on semantically labeled point cloud data (PCD), which considers six asset types: Road Surface, Road Side (Slope), Road Lane (Marking), Road Sign, Road Light, and Guardrail. The framework first segments the semantic PCD into spatially independent instances or parts, then extracts the sectional polygon contours as their representative geometric information, stored in JavaScript Object Notation (JSON) files using a new data structure. Primitive gDTs are finally created from JSON files using corresponding conversion algorithms. The proposed method achieves an average distance error of 1.46 centimeters and a processing speed of 6.29 meters per second on six real-world road segments with a total length of 1,200 meters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.12404v1-abstract-full').style.display = 'none'; document.getElementById('2406.12404v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.05590">arXiv:2406.05590</a> <span> [<a href="https://arxiv.org/pdf/2406.05590">pdf</a>, <a href="https://arxiv.org/format/2406.05590">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> NYU CTF Dataset: A Scalable Open-Source Benchmark Dataset for Evaluating LLMs in Offensive Security </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shao%2C+M">Minghao Shao</a>, <a href="/search/cs?searchtype=author&query=Jancheska%2C+S">Sofija Jancheska</a>, <a href="/search/cs?searchtype=author&query=Udeshi%2C+M">Meet Udeshi</a>, <a href="/search/cs?searchtype=author&query=Dolan-Gavitt%2C+B">Brendan Dolan-Gavitt</a>, <a href="/search/cs?searchtype=author&query=Xi%2C+H">Haoran Xi</a>, <a href="/search/cs?searchtype=author&query=Milner%2C+K">Kimberly Milner</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Boyuan Chen</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Max Yin</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+S">Siddharth Garg</a>, <a href="/search/cs?searchtype=author&query=Krishnamurthy%2C+P">Prashanth Krishnamurthy</a>, <a href="/search/cs?searchtype=author&query=Khorrami%2C+F">Farshad Khorrami</a>, <a href="/search/cs?searchtype=author&query=Karri%2C+R">Ramesh Karri</a>, <a href="/search/cs?searchtype=author&query=Shafique%2C+M">Muhammad Shafique</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.05590v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) are being deployed across various domains today. However, their capacity to solve Capture the Flag (CTF) challenges in cybersecurity has not been thoroughly evaluated. To address this, we develop a novel method to assess LLMs in solving CTF challenges by creating a scalable, open-source benchmark database specifically designed for these applications. This database incl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05590v2-abstract-full').style.display = 'inline'; document.getElementById('2406.05590v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.05590v2-abstract-full" style="display: none;"> Large Language Models (LLMs) are being deployed across various domains today. However, their capacity to solve Capture the Flag (CTF) challenges in cybersecurity has not been thoroughly evaluated. To address this, we develop a novel method to assess LLMs in solving CTF challenges by creating a scalable, open-source benchmark database specifically designed for these applications. This database includes metadata for LLM testing and adaptive learning, compiling a diverse range of CTF challenges from popular competitions. Utilizing the advanced function calling capabilities of LLMs, we build a fully automated system with an enhanced workflow and support for external tool calls. Our benchmark dataset and automated framework allow us to evaluate the performance of five LLMs, encompassing both black-box and open-source models. This work lays the foundation for future research into improving the efficiency of LLMs in interactive cybersecurity tasks and automated task planning. By providing a specialized dataset, our project offers an ideal platform for developing, testing, and refining LLM-based approaches to vulnerability detection and resolution. Evaluating LLMs on these challenges and comparing with human performance yields insights into their potential for AI-driven cybersecurity solutions to perform real-world threat management. We make our dataset open source to public https://github.com/NYU-LLM-CTF/LLM_CTF_Database along with our playground automated framework https://github.com/NYU-LLM-CTF/llm_ctf_automation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.05590v2-abstract-full').style.display = 'none'; document.getElementById('2406.05590v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.01838">arXiv:2406.01838</a> <span> [<a href="https://arxiv.org/pdf/2406.01838">pdf</a>, <a href="https://arxiv.org/format/2406.01838">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Learning the Target Network in Function Space </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Asadi%2C+K">Kavosh Asadi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yao Liu</a>, <a href="/search/cs?searchtype=author&query=Sabach%2C+S">Shoham Sabach</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Ming Yin</a>, <a href="/search/cs?searchtype=author&query=Fakoor%2C+R">Rasool Fakoor</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.01838v2-abstract-short" style="display: inline;"> We focus on the task of learning the value function in the reinforcement learning (RL) setting. This task is often solved by updating a pair of online and target networks while ensuring that the parameters of these two networks are equivalent. We propose Lookahead-Replicate (LR), a new value-function approximation algorithm that is agnostic to this parameter-space equivalence. Instead, the LR algo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01838v2-abstract-full').style.display = 'inline'; document.getElementById('2406.01838v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.01838v2-abstract-full" style="display: none;"> We focus on the task of learning the value function in the reinforcement learning (RL) setting. This task is often solved by updating a pair of online and target networks while ensuring that the parameters of these two networks are equivalent. We propose Lookahead-Replicate (LR), a new value-function approximation algorithm that is agnostic to this parameter-space equivalence. Instead, the LR algorithm is designed to maintain an equivalence between the two networks in the function space. This value-based equivalence is obtained by employing a new target-network update. We show that LR leads to a convergent behavior in learning the value function. We also present empirical results demonstrating that LR-based target-network updates significantly improve deep RL on the Atari benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.01838v2-abstract-full').style.display = 'none'; document.getElementById('2406.01838v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to International Conference on Machine Learning (ICML24)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.20495">arXiv:2405.20495</a> <span> [<a href="https://arxiv.org/pdf/2405.20495">pdf</a>, <a href="https://arxiv.org/format/2405.20495">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Transfer Q Star: Principled Decoding for LLM Alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chakraborty%2C+S">Souradip Chakraborty</a>, <a href="/search/cs?searchtype=author&query=Ghosal%2C+S+S">Soumya Suvra Ghosal</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Ming Yin</a>, <a href="/search/cs?searchtype=author&query=Manocha%2C+D">Dinesh Manocha</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+M">Mengdi Wang</a>, <a href="/search/cs?searchtype=author&query=Bedi%2C+A+S">Amrit Singh Bedi</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+F">Furong Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.20495v1-abstract-short" style="display: inline;"> Aligning foundation models is essential for their safe and trustworthy deployment. However, traditional fine-tuning methods are computationally intensive and require updating billions of model parameters. A promising alternative, alignment via decoding, adjusts the response distribution directly without model updates to maximize a target reward $r$, thus providing a lightweight and adaptable frame… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20495v1-abstract-full').style.display = 'inline'; document.getElementById('2405.20495v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.20495v1-abstract-full" style="display: none;"> Aligning foundation models is essential for their safe and trustworthy deployment. However, traditional fine-tuning methods are computationally intensive and require updating billions of model parameters. A promising alternative, alignment via decoding, adjusts the response distribution directly without model updates to maximize a target reward $r$, thus providing a lightweight and adaptable framework for alignment. However, principled decoding methods rely on oracle access to an optimal Q-function ($Q^*$), which is often unavailable in practice. Hence, prior SoTA methods either approximate this $Q^*$ using $Q^{蟺_{\texttt{sft}}}$ (derived from the reference $\texttt{SFT}$ model) or rely on short-term rewards, resulting in sub-optimal decoding performance. In this work, we propose Transfer $Q^*$, which implicitly estimates the optimal value function for a target reward $r$ through a baseline model $蟻_{\texttt{BL}}$ aligned with a baseline reward $蟻_{\texttt{BL}}$ (which can be different from the target reward $r$). Theoretical analyses of Transfer $Q^*$ provide a rigorous characterization of its optimality, deriving an upper bound on the sub-optimality gap and identifying a hyperparameter to control the deviation from the pre-trained reference $\texttt{SFT}$ model based on user needs. Our approach significantly reduces the sub-optimality gap observed in prior SoTA methods and demonstrates superior empirical performance across key metrics such as coherence, diversity, and quality in extensive tests on several synthetic and real datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20495v1-abstract-full').style.display = 'none'; document.getElementById('2405.20495v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.17795">arXiv:2405.17795</a> <span> [<a href="https://arxiv.org/pdf/2405.17795">pdf</a>, <a href="https://arxiv.org/format/2405.17795">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3637528.3671841">10.1145/3637528.3671841 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Dataset Regeneration for Sequential Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mingjia Yin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wei Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yong Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Suojuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Sirui Zhao</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+D">Defu Lian</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+E">Enhong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.17795v3-abstract-short" style="display: inline;"> The sequential recommender (SR) system is a crucial component of modern recommender systems, as it aims to capture the evolving preferences of users. Significant efforts have been made to enhance the capabilities of SR systems. These methods typically follow the model-centric paradigm, which involves developing effective models based on fixed datasets. However, this approach often overlooks potent… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17795v3-abstract-full').style.display = 'inline'; document.getElementById('2405.17795v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.17795v3-abstract-full" style="display: none;"> The sequential recommender (SR) system is a crucial component of modern recommender systems, as it aims to capture the evolving preferences of users. Significant efforts have been made to enhance the capabilities of SR systems. These methods typically follow the model-centric paradigm, which involves developing effective models based on fixed datasets. However, this approach often overlooks potential quality issues and flaws inherent in the data. Driven by the potential of data-centric AI, we propose a novel data-centric paradigm for developing an ideal training dataset using a model-agnostic dataset regeneration framework called DR4SR. This framework enables the regeneration of a dataset with exceptional cross-architecture generalizability. Additionally, we introduce the DR4SR+ framework, which incorporates a model-aware dataset personalizer to tailor the regenerated dataset specifically for a target model. To demonstrate the effectiveness of the data-centric paradigm, we integrate our framework with various model-centric methods and observe significant performance improvements across four widely adopted datasets. Furthermore, we conduct in-depth analyses to explore the potential of the data-centric paradigm and provide valuable insights. The code can be found at https://github.com/USTC-StarTeam/DR4SR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.17795v3-abstract-full').style.display = 'none'; document.getElementById('2405.17795v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16345">arXiv:2405.16345</a> <span> [<a href="https://arxiv.org/pdf/2405.16345">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Cypher4BIM: Releasing the Power of Graph for Building Knowledge Discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Junxiang Zhu</a>, <a href="/search/cs?searchtype=author&query=Nisbet%2C+N">Nicholas Nisbet</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mengtian Yin</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+R">Ran Wei</a>, <a href="/search/cs?searchtype=author&query=Brilakis%2C+I">Ioannis Brilakis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16345v1-abstract-short" style="display: inline;"> Graph is considered a promising way for managing building information. A new graphic form of IFC (Industry Foundation Classes) data has just been developed, referred to as IFC-Graph. However, understanding of IFC-Graph is insufficient, especially for information query. This study aims to explore graphic building information query and develop a graph query language tailored for IFC-Graph. A series… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16345v1-abstract-full').style.display = 'inline'; document.getElementById('2405.16345v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16345v1-abstract-full" style="display: none;"> Graph is considered a promising way for managing building information. A new graphic form of IFC (Industry Foundation Classes) data has just been developed, referred to as IFC-Graph. However, understanding of IFC-Graph is insufficient, especially for information query. This study aims to explore graphic building information query and develop a graph query language tailored for IFC-Graph. A series of tasks were carried out, including a) investigating the structure of IFC data and the main types of information in IFC, b) investigating the graph query language Cypher, and c) developing a set of tailored functional query patterns. The developed language is referred to as Cypher4BIM. Five IFC models were used for validation, and the result shows that Cypher4BIM can query individual instances and complex relations from IFC, such as spatial structure, space boundary, and space accessibility. This study contributes to applications that require effective building information query, such as digital twin. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16345v1-abstract-full').style.display = 'none'; document.getElementById('2405.16345v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.12473">arXiv:2405.12473</a> <span> [<a href="https://arxiv.org/pdf/2405.12473">pdf</a>, <a href="https://arxiv.org/format/2405.12473">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Learning Partially Aligned Item Representation for Cross-Domain Sequential Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mingjia Yin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+W">Wei Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yong Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhi Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Sirui Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhen Wang</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+D">Defu Lian</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+E">Enhong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.12473v3-abstract-short" style="display: inline;"> Cross-domain sequential recommendation (CDSR) aims to uncover and transfer users' sequential preferences across multiple recommendation domains. While significant endeavors have been made, they primarily concentrated on developing advanced transfer modules and aligning user representations using self-supervised learning techniques. However, the problem of aligning item representations has received… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.12473v3-abstract-full').style.display = 'inline'; document.getElementById('2405.12473v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.12473v3-abstract-full" style="display: none;"> Cross-domain sequential recommendation (CDSR) aims to uncover and transfer users' sequential preferences across multiple recommendation domains. While significant endeavors have been made, they primarily concentrated on developing advanced transfer modules and aligning user representations using self-supervised learning techniques. However, the problem of aligning item representations has received limited attention, and misaligned item representations can potentially lead to sub-optimal sequential modeling and user representation alignment. To this end, we propose a model-agnostic framework called \textbf{C}ross-domain item representation \textbf{A}lignment for \textbf{C}ross-\textbf{D}omain \textbf{S}equential \textbf{R}ecommendation (\textbf{CA-CDSR}), which achieves sequence-aware generation and adaptively partial alignment for item representations. Specifically, we first develop a sequence-aware feature augmentation strategy, which captures both collaborative and sequential item correlations, thus facilitating holistic item representation generation. Next, we conduct an empirical study to investigate the partial representation alignment problem from a spectrum perspective. It motivates us to devise an adaptive spectrum filter, achieving partial alignment adaptively. Furthermore, the aligned item representations can be fed into different sequential encoders to obtain user representations. The entire framework is optimized in a multi-task learning paradigm with an annealing strategy. Extensive experiments have demonstrated that CA-CDSR can surpass state-of-the-art baselines by a significant margin and can effectively align items in representation spaces to enhance performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.12473v3-abstract-full').style.display = 'none'; document.getElementById('2405.12473v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.17069">arXiv:2404.17069</a> <span> [<a href="https://arxiv.org/pdf/2404.17069">pdf</a>, <a href="https://arxiv.org/format/2404.17069">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Channel Modeling for FR3 Upper Mid-band via Generative Adversarial Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yaqi Hu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mingsheng Yin</a>, <a href="/search/cs?searchtype=author&query=Mezzavilla%2C+M">Marco Mezzavilla</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+H">Hao Guo</a>, <a href="/search/cs?searchtype=author&query=Rangan%2C+S">Sundeep Rangan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.17069v1-abstract-short" style="display: inline;"> The upper mid-band (FR3) has been recently attracting interest for new generation of mobile networks, as it provides a promising balance between spectrum availability and coverage, which are inherent limitations of the sub 6GHz and millimeter wave bands, respectively. In order to efficiently design and optimize the network, channel modeling plays a key role since FR3 systems are expected to operat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17069v1-abstract-full').style.display = 'inline'; document.getElementById('2404.17069v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.17069v1-abstract-full" style="display: none;"> The upper mid-band (FR3) has been recently attracting interest for new generation of mobile networks, as it provides a promising balance between spectrum availability and coverage, which are inherent limitations of the sub 6GHz and millimeter wave bands, respectively. In order to efficiently design and optimize the network, channel modeling plays a key role since FR3 systems are expected to operate at multiple frequency bands. Data-driven methods, especially generative adversarial networks (GANs), can capture the intricate relationships among data samples, and provide an appropriate tool for FR3 channel modeling. In this work, we present the architecture, link state model, and path generative network of GAN-based FR3 channel modeling. The comparison of our model greatly matches the ray-tracing simulated data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.17069v1-abstract-full').style.display = 'none'; document.getElementById('2404.17069v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.13528">arXiv:2404.13528</a> <span> [<a href="https://arxiv.org/pdf/2404.13528">pdf</a>, <a href="https://arxiv.org/format/2404.13528">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3620666.3651384">10.1145/3620666.3651384 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> SmartMem: Layout Transformation Elimination and Adaptation for Efficient DNN Execution on Mobile </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Niu%2C+W">Wei Niu</a>, <a href="/search/cs?searchtype=author&query=Sanim%2C+M+M+R">Md Musfiqur Rahman Sanim</a>, <a href="/search/cs?searchtype=author&query=Shu%2C+Z">Zhihao Shu</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+J">Jiexiong Guan</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+X">Xipeng Shen</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Miao Yin</a>, <a href="/search/cs?searchtype=author&query=Agrawal%2C+G">Gagan Agrawal</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+B">Bin Ren</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.13528v1-abstract-short" style="display: inline;"> This work is motivated by recent developments in Deep Neural Networks, particularly the Transformer architectures underlying applications such as ChatGPT, and the need for performing inference on mobile devices. Focusing on emerging transformers (specifically the ones with computationally efficient Swin-like architectures) and large models (e.g., Stable Diffusion and LLMs) based on transformers, w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13528v1-abstract-full').style.display = 'inline'; document.getElementById('2404.13528v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.13528v1-abstract-full" style="display: none;"> This work is motivated by recent developments in Deep Neural Networks, particularly the Transformer architectures underlying applications such as ChatGPT, and the need for performing inference on mobile devices. Focusing on emerging transformers (specifically the ones with computationally efficient Swin-like architectures) and large models (e.g., Stable Diffusion and LLMs) based on transformers, we observe that layout transformations between the computational operators cause a significant slowdown in these applications. This paper presents SmartMem, a comprehensive framework for eliminating most layout transformations, with the idea that multiple operators can use the same tensor layout through careful choice of layout and implementation of operations. Our approach is based on classifying the operators into four groups, and considering combinations of producer-consumer edges between the operators. We develop a set of methods for searching such layouts. Another component of our work is developing efficient memory layouts for 2.5 dimensional memory commonly seen in mobile devices. Our experimental results show that SmartMem outperforms 5 state-of-the-art DNN execution frameworks on mobile devices across 18 varied neural networks, including CNNs, Transformers with both local and global attention, as well as LLMs. In particular, compared to DNNFusion, SmartMem achieves an average speedup of 2.8$\times$, and outperforms TVM and MNN with speedups of 6.9$\times$ and 7.9$\times$, respectively, on average. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13528v1-abstract-full').style.display = 'none'; document.getElementById('2404.13528v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.13470">arXiv:2404.13470</a> <span> [<a href="https://arxiv.org/pdf/2404.13470">pdf</a>, <a href="https://arxiv.org/format/2404.13470">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> GWLZ: A Group-wise Learning-based Lossy Compression Framework for Scientific Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jia%2C+W">Wenqi Jia</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+S">Sian Jin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jinzhen Wang</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+W">Wei Niu</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+D">Dingwen Tao</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Miao Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.13470v1-abstract-short" style="display: inline;"> The rapid expansion of computational capabilities and the ever-growing scale of modern HPC systems present formidable challenges in managing exascale scientific data. Faced with such vast datasets, traditional lossless compression techniques prove insufficient in reducing data size to a manageable level while preserving all information intact. In response, researchers have turned to error-bounded… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13470v1-abstract-full').style.display = 'inline'; document.getElementById('2404.13470v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.13470v1-abstract-full" style="display: none;"> The rapid expansion of computational capabilities and the ever-growing scale of modern HPC systems present formidable challenges in managing exascale scientific data. Faced with such vast datasets, traditional lossless compression techniques prove insufficient in reducing data size to a manageable level while preserving all information intact. In response, researchers have turned to error-bounded lossy compression methods, which offer a balance between data size reduction and information retention. However, despite their utility, these compressors employing conventional techniques struggle with limited reconstruction quality. To address this issue, we draw inspiration from recent advancements in deep learning and propose GWLZ, a novel group-wise learning-based lossy compression framework with multiple lightweight learnable enhancer models. Leveraging a group of neural networks, GWLZ significantly enhances the decompressed data reconstruction quality with negligible impact on the compression efficiency. Experimental results on different fields from the Nyx dataset demonstrate remarkable improvements by GWLZ, achieving up to 20% quality enhancements with negligible overhead as low as 0.0003x. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.13470v1-abstract-full').style.display = 'none'; document.getElementById('2404.13470v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.11871">arXiv:2404.11871</a> <span> [<a href="https://arxiv.org/pdf/2404.11871">pdf</a>, <a href="https://arxiv.org/format/2404.11871">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Group-On: Boosting One-Shot Segmentation with Supportive Query </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Hanjing Zhou</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mingze Yin</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">JinTai Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+D">Danny Chen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jian Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.11871v1-abstract-short" style="display: inline;"> One-shot semantic segmentation aims to segment query images given only ONE annotated support image of the same class. This task is challenging because target objects in the support and query images can be largely different in appearance and pose (i.e., intra-class variation). Prior works suggested that incorporating more annotated support images in few-shot settings boosts performances but increas… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.11871v1-abstract-full').style.display = 'inline'; document.getElementById('2404.11871v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.11871v1-abstract-full" style="display: none;"> One-shot semantic segmentation aims to segment query images given only ONE annotated support image of the same class. This task is challenging because target objects in the support and query images can be largely different in appearance and pose (i.e., intra-class variation). Prior works suggested that incorporating more annotated support images in few-shot settings boosts performances but increases costs due to additional manual labeling. In this paper, we propose a novel approach for ONE-shot semantic segmentation, called Group-On, which packs multiple query images in batches for the benefit of mutual knowledge support within the same category. Specifically, after coarse segmentation masks of the batch of queries are predicted, query-mask pairs act as pseudo support data to enhance mask predictions mutually, under the guidance of a simple Group-On Voting module. Comprehensive experiments on three standard benchmarks show that, in the ONE-shot setting, our Group-On approach significantly outperforms previous works by considerable margins. For example, on the COCO-20i dataset, we increase mIoU scores by 8.21% and 7.46% on ASNet and HSNet baselines, respectively. With only one support image, Group-On can be even competitive with the counterparts using 5 annotated support images. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.11871v1-abstract-full').style.display = 'none'; document.getElementById('2404.11871v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.04057">arXiv:2404.04057</a> <span> [<a href="https://arxiv.org/pdf/2404.04057">pdf</a>, <a href="https://arxiv.org/format/2404.04057">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Score identity Distillation: Exponentially Fast Distillation of Pretrained Diffusion Models for One-Step Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+M">Mingyuan Zhou</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+H">Huangjie Zheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhendong Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mingzhang Yin</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Hai Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.04057v3-abstract-short" style="display: inline;"> We introduce Score identity Distillation (SiD), an innovative data-free method that distills the generative capabilities of pretrained diffusion models into a single-step generator. SiD not only facilitates an exponentially fast reduction in Fr茅chet inception distance (FID) during distillation but also approaches or even exceeds the FID performance of the original teacher diffusion models. By refo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.04057v3-abstract-full').style.display = 'inline'; document.getElementById('2404.04057v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.04057v3-abstract-full" style="display: none;"> We introduce Score identity Distillation (SiD), an innovative data-free method that distills the generative capabilities of pretrained diffusion models into a single-step generator. SiD not only facilitates an exponentially fast reduction in Fr茅chet inception distance (FID) during distillation but also approaches or even exceeds the FID performance of the original teacher diffusion models. By reformulating forward diffusion processes as semi-implicit distributions, we leverage three score-related identities to create an innovative loss mechanism. This mechanism achieves rapid FID reduction by training the generator using its own synthesized images, eliminating the need for real data or reverse-diffusion-based generation, all accomplished within significantly shortened generation time. Upon evaluation across four benchmark datasets, the SiD algorithm demonstrates high iteration efficiency during distillation and surpasses competing distillation approaches, whether they are one-step or few-step, data-free, or dependent on training data, in terms of generation quality. This achievement not only redefines the benchmarks for efficiency and effectiveness in diffusion distillation but also in the broader field of diffusion-based generation. The PyTorch implementation is available at https://github.com/mingyuanzhou/SiD <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.04057v3-abstract-full').style.display = 'none'; document.getElementById('2404.04057v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICML 2024, PyTorch implementation: https://github.com/mingyuanzhou/SiD</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.00268">arXiv:2404.00268</a> <span> [<a href="https://arxiv.org/pdf/2404.00268">pdf</a>, <a href="https://arxiv.org/format/2404.00268">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> A Unified Framework for Adaptive Representation Enhancement and Inversed Learning in Cross-Domain Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Luankang Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Suojuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mingjia Yin</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Y">Yongqiang Han</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiaqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+D">Defu Lian</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+E">Enhong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.00268v1-abstract-short" style="display: inline;"> Cross-domain recommendation (CDR), aiming to extract and transfer knowledge across domains, has attracted wide attention for its efficacy in addressing data sparsity and cold-start problems. Despite significant advances in representation disentanglement to capture diverse user preferences, existing methods usually neglect representation enhancement and lack rigorous decoupling constraints, thereby… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00268v1-abstract-full').style.display = 'inline'; document.getElementById('2404.00268v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.00268v1-abstract-full" style="display: none;"> Cross-domain recommendation (CDR), aiming to extract and transfer knowledge across domains, has attracted wide attention for its efficacy in addressing data sparsity and cold-start problems. Despite significant advances in representation disentanglement to capture diverse user preferences, existing methods usually neglect representation enhancement and lack rigorous decoupling constraints, thereby limiting the transfer of relevant information. To this end, we propose a Unified Framework for Adaptive Representation Enhancement and Inversed Learning in Cross-Domain Recommendation (AREIL). Specifically, we first divide user embeddings into domain-shared and domain-specific components to disentangle mixed user preferences. Then, we incorporate intra-domain and inter-domain information to adaptively enhance the ability of user representations. In particular, we propose a graph convolution module to capture high-order information, and a self-attention module to reveal inter-domain correlations and accomplish adaptive fusion. Next, we adopt domain classifiers and gradient reversal layers to achieve inversed representation learning in a unified framework. Finally, we employ a cross-entropy loss for measuring recommendation performance and jointly optimize the entire framework via multi-task learning. Extensive experiments on multiple datasets validate the substantial improvement in the recommendation performance of AREIL. Moreover, ablation studies and representation visualizations further illustrate the effectiveness of adaptive enhancement and inversed learning in CDR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.00268v1-abstract-full').style.display = 'none'; document.getElementById('2404.00268v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by DASFAA 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.16812">arXiv:2403.16812</a> <span> [<a href="https://arxiv.org/pdf/2403.16812">pdf</a>, <a href="https://arxiv.org/format/2403.16812">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards Human-AI Deliberation: Design and Evaluation of LLM-Empowered Deliberative AI for AI-Assisted Decision-Making </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ma%2C+S">Shuai Ma</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Q">Qiaoyi Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xinru Wang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+C">Chengbo Zheng</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+Z">Zhenhui Peng</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Ming Yin</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xiaojuan Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.16812v1-abstract-short" style="display: inline;"> In AI-assisted decision-making, humans often passively review AI's suggestion and decide whether to accept or reject it as a whole. In such a paradigm, humans are found to rarely trigger analytical thinking and face difficulties in communicating the nuances of conflicting opinions to the AI when disagreements occur. To tackle this challenge, we propose Human-AI Deliberation, a novel framework to p… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16812v1-abstract-full').style.display = 'inline'; document.getElementById('2403.16812v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.16812v1-abstract-full" style="display: none;"> In AI-assisted decision-making, humans often passively review AI's suggestion and decide whether to accept or reject it as a whole. In such a paradigm, humans are found to rarely trigger analytical thinking and face difficulties in communicating the nuances of conflicting opinions to the AI when disagreements occur. To tackle this challenge, we propose Human-AI Deliberation, a novel framework to promote human reflection and discussion on conflicting human-AI opinions in decision-making. Based on theories in human deliberation, this framework engages humans and AI in dimension-level opinion elicitation, deliberative discussion, and decision updates. To empower AI with deliberative capabilities, we designed Deliberative AI, which leverages large language models (LLMs) as a bridge between humans and domain-specific models to enable flexible conversational interactions and faithful information provision. An exploratory evaluation on a graduate admissions task shows that Deliberative AI outperforms conventional explainable AI (XAI) assistants in improving humans' appropriate reliance and task performance. Based on a mixed-methods analysis of participant behavior, perception, user experience, and open-ended feedback, we draw implications for future AI-assisted decision tool design. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16812v1-abstract-full').style.display = 'none'; document.getElementById('2403.16812v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Yin%2C+M&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Yin%2C+M&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Yin%2C+M&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Yin%2C+M&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Yin%2C+M&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Yin%2C+M&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository