Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 2,696 results for author: <span class="mathjax">Huang, Y</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Huang%2C+Y">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Huang, Y"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Huang%2C+Y&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Huang, Y"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Huang%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Huang%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+Y&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10042">arXiv:2502.10042</a> <span> [<a href="https://arxiv.org/pdf/2502.10042">pdf</a>, <a href="https://arxiv.org/format/2502.10042">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Scaling Law Tradeoff Between Throughput and Sensing Distance in Large ISAC Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+M">Min Qiu</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+M">Ming-Chun Lee</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yu-Chih Huang</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+J">Jinhong Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10042v1-abstract-short" style="display: inline;"> In this paper, we investigate the fundamental tradeoff between communication and sensing performance of \emph{ad hoc} integrated sensing and communication (ISAC) wireless networks. Specifically, we consider that $n$ nodes are randomly located in an extended network with area $n$ and transmit ISAC signals. Under the pure path loss channel gain model and the condition that the transmission power sca… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10042v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10042v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10042v1-abstract-full" style="display: none;"> In this paper, we investigate the fundamental tradeoff between communication and sensing performance of \emph{ad hoc} integrated sensing and communication (ISAC) wireless networks. Specifically, we consider that $n$ nodes are randomly located in an extended network with area $n$ and transmit ISAC signals. Under the pure path loss channel gain model and the condition that the transmission power scales according to the communication distance, we fully characterize the optimal scaling law tradeoff between throughput and sensing distance by proposing an achievable scheme and proving its converse. Our results can be interpreted as follows: by reducing the throughput by a factor of a function of $n$, the sensing range order improves according to the same function of $n$, raised to the power of the ratio between the path loss factors in communication and sensing. We prove that the same result also holds true for ISAC networks with random fading, despite the uncertainty on the connectivity and power level created by random fading. In addition, we show that the scaling law tradeoff cannot be improved by allowing the transmission power and communication distance to scale freely. To the best of our knowledge, this is the first work formally formulating and characterizing the communication and sensing performance scaling law tradeoff of \emph{ad hoc} ISAC networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10042v1-abstract-full').style.display = 'none'; document.getElementById('2502.10042v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09967">arXiv:2502.09967</a> <span> [<a href="https://arxiv.org/pdf/2502.09967">pdf</a>, <a href="https://arxiv.org/format/2502.09967">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> VicKAM: Visual Conceptual Knowledge Guided Action Map for Weakly Supervised Group Activity Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhuming Wang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yihao Zheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiarui Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yaofei Wu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yan Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zun Li</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+L">Lifang Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09967v1-abstract-short" style="display: inline;"> Existing weakly supervised group activity recognition methods rely on object detectors or attention mechanisms to capture key areas automatically. However, they overlook the semantic information associated with captured areas, which may adversely affect the recognition performance. In this paper, we propose a novel framework named Visual Conceptual Knowledge Guided Action Map (VicKAM) which effect… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09967v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09967v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09967v1-abstract-full" style="display: none;"> Existing weakly supervised group activity recognition methods rely on object detectors or attention mechanisms to capture key areas automatically. However, they overlook the semantic information associated with captured areas, which may adversely affect the recognition performance. In this paper, we propose a novel framework named Visual Conceptual Knowledge Guided Action Map (VicKAM) which effectively captures the locations of individual actions and integrates them with action semantics for weakly supervised group activity recognition.It generates individual action prototypes from training set as visual conceptual knowledge to bridge action semantics and visual representations. Guided by this knowledge, VicKAM produces action maps that indicate the likelihood of each action occurring at various locations, based on image correlation theorem. It further augments individual action maps using group activity related statistical information, representing individual action distribution under different group activities, to establish connections between action maps and specific group activities. The augmented action map is incorporated with action semantic representations for group activity recognition.Extensive experiments on two public benchmarks, the Volleyball and the NBA datasets, demonstrate the effectiveness of our proposed method, even in cases of limited training data. The code will be released later. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09967v1-abstract-full').style.display = 'none'; document.getElementById('2502.09967v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09897">arXiv:2502.09897</a> <span> [<a href="https://arxiv.org/pdf/2502.09897">pdf</a>, <a href="https://arxiv.org/format/2502.09897">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Artificial Intelligence in Spectroscopy: Advancing Chemistry from Prediction to Generation and Beyond </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+K">Kehan Guo</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yili Shen</a>, <a href="/search/cs?searchtype=author&query=Gonzalez-Montiel%2C+G+A">Gisela Abigail Gonzalez-Montiel</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yue Huang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yujun Zhou</a>, <a href="/search/cs?searchtype=author&query=Surve%2C+M">Mihir Surve</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Zhichun Guo</a>, <a href="/search/cs?searchtype=author&query=Das%2C+P">Prayel Das</a>, <a href="/search/cs?searchtype=author&query=Chawla%2C+N+V">Nitesh V Chawla</a>, <a href="/search/cs?searchtype=author&query=Wiest%2C+O">Olaf Wiest</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiangliang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09897v1-abstract-short" style="display: inline;"> The rapid advent of machine learning (ML) and artificial intelligence (AI) has catalyzed major transformations in chemistry, yet the application of these methods to spectroscopic and spectrometric data, referred to as Spectroscopy Machine Learning (SpectraML), remains relatively underexplored. Modern spectroscopic techniques (MS, NMR, IR, Raman, UV-Vis) generate an ever-growing volume of high-dime… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09897v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09897v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09897v1-abstract-full" style="display: none;"> The rapid advent of machine learning (ML) and artificial intelligence (AI) has catalyzed major transformations in chemistry, yet the application of these methods to spectroscopic and spectrometric data, referred to as Spectroscopy Machine Learning (SpectraML), remains relatively underexplored. Modern spectroscopic techniques (MS, NMR, IR, Raman, UV-Vis) generate an ever-growing volume of high-dimensional data, creating a pressing need for automated and intelligent analysis beyond traditional expert-based workflows. In this survey, we provide a unified review of SpectraML, systematically examining state-of-the-art approaches for both forward tasks (molecule-to-spectrum prediction) and inverse tasks (spectrum-to-molecule inference). We trace the historical evolution of ML in spectroscopy, from early pattern recognition to the latest foundation models capable of advanced reasoning, and offer a taxonomy of representative neural architectures, including graph-based and transformer-based methods. Addressing key challenges such as data quality, multimodal integration, and computational scalability, we highlight emerging directions such as synthetic data generation, large-scale pretraining, and few- or zero-shot learning. To foster reproducible research, we also release an open-source repository containing recent papers and their corresponding curated datasets (https://github.com/MINE-Lab-ND/SpectrumML_Survey_Papers). Our survey serves as a roadmap for researchers, guiding progress at the intersection of spectroscopy and AI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09897v1-abstract-full').style.display = 'none'; document.getElementById('2502.09897v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09304">arXiv:2502.09304</a> <span> [<a href="https://arxiv.org/pdf/2502.09304">pdf</a>, <a href="https://arxiv.org/format/2502.09304">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> KET-RAG: A Cost-Efficient Multi-Granular Indexing Framework for Graph-RAG </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yiqian Huang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shiqi Zhang</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+X">Xiaokui Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09304v1-abstract-short" style="display: inline;"> Graph-RAG constructs a knowledge graph from text chunks to improve retrieval in Large Language Model (LLM)-based question answering. It is particularly useful in domains such as biomedicine, law, and political science, where retrieval often requires multi-hop reasoning over proprietary documents. Some existing Graph-RAG systems construct KNN graphs based on text chunk relevance, but this coarse-gr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09304v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09304v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09304v1-abstract-full" style="display: none;"> Graph-RAG constructs a knowledge graph from text chunks to improve retrieval in Large Language Model (LLM)-based question answering. It is particularly useful in domains such as biomedicine, law, and political science, where retrieval often requires multi-hop reasoning over proprietary documents. Some existing Graph-RAG systems construct KNN graphs based on text chunk relevance, but this coarse-grained approach fails to capture entity relationships within texts, leading to sub-par retrieval and generation quality. To address this, recent solutions leverage LLMs to extract entities and relationships from text chunks, constructing triplet-based knowledge graphs. However, this approach incurs significant indexing costs, especially for large document collections. To ensure a good result accuracy while reducing the indexing cost, we propose KET-RAG, a multi-granular indexing framework. KET-RAG first identifies a small set of key text chunks and leverages an LLM to construct a knowledge graph skeleton. It then builds a text-keyword bipartite graph from all text chunks, serving as a lightweight alternative to a full knowledge graph. During retrieval, KET-RAG searches both structures: it follows the local search strategy of existing Graph-RAG systems on the skeleton while mimicking this search on the bipartite graph to improve retrieval quality. We evaluate eight solutions on two real-world datasets, demonstrating that KET-RAG outperforms all competitors in indexing cost, retrieval effectiveness, and generation quality. Notably, it achieves comparable or superior retrieval quality to Microsoft's Graph-RAG while reducing indexing costs by over an order of magnitude. Additionally, it improves the generation quality by up to 32.4% while lowering indexing costs by around 20%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09304v1-abstract-full').style.display = 'none'; document.getElementById('2502.09304v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08970">arXiv:2502.08970</a> <span> [<a href="https://arxiv.org/pdf/2502.08970">pdf</a>, <a href="https://arxiv.org/format/2502.08970">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> A Decade of Metric Differential Privacy: Advancements and Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+X">Xinpeng Xie</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+C">Chenyang Yu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yan Huang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yang Cao</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+C">Chenxi Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08970v1-abstract-short" style="display: inline;"> Metric Differential Privacy (mDP) builds upon the core principles of Differential Privacy (DP) by incorporating various distance metrics, which offer adaptable and context-sensitive privacy guarantees for a wide range of applications, such as location-based services, text analysis, and image processing. Since its inception in 2013, mDP has garnered substantial research attention, advancing theoret… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08970v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08970v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08970v1-abstract-full" style="display: none;"> Metric Differential Privacy (mDP) builds upon the core principles of Differential Privacy (DP) by incorporating various distance metrics, which offer adaptable and context-sensitive privacy guarantees for a wide range of applications, such as location-based services, text analysis, and image processing. Since its inception in 2013, mDP has garnered substantial research attention, advancing theoretical foundations, algorithm design, and practical implementations. Despite this progress, existing surveys mainly focus on traditional DP and local DP, and they provide limited coverage of mDP. This paper provides a comprehensive survey of mDP research from 2013 to 2024, tracing its development from the foundations of DP. We categorize essential mechanisms, including Laplace, Exponential, and optimization-based approaches, and assess their strengths, limitations, and application domains. Additionally, we highlight key challenges and outline future research directions to encourage innovation and real-world adoption of mDP. This survey is designed to be a valuable resource for researchers and practitioners aiming to deepen their understanding and drive progress in mDP within the broader privacy ecosystem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08970v1-abstract-full').style.display = 'none'; document.getElementById('2502.08970v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08958">arXiv:2502.08958</a> <span> [<a href="https://arxiv.org/pdf/2502.08958">pdf</a>, <a href="https://arxiv.org/format/2502.08958">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Biologically Plausible Brain Graph Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Peng%2C+C">Ciyuan Peng</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yuelong Huang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Q">Qichao Dong</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+S">Shuo Yu</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+F">Feng Xia</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chengqi Zhang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Y">Yaochu Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08958v1-abstract-short" style="display: inline;"> State-of-the-art brain graph analysis methods fail to fully encode the small-world architecture of brain graphs (accompanied by the presence of hubs and functional modules), and therefore lack biological plausibility to some extent. This limitation hinders their ability to accurately represent the brain's structural and functional properties, thereby restricting the effectiveness of machine learni… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08958v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08958v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08958v1-abstract-full" style="display: none;"> State-of-the-art brain graph analysis methods fail to fully encode the small-world architecture of brain graphs (accompanied by the presence of hubs and functional modules), and therefore lack biological plausibility to some extent. This limitation hinders their ability to accurately represent the brain's structural and functional properties, thereby restricting the effectiveness of machine learning models in tasks such as brain disorder detection. In this work, we propose a novel Biologically Plausible Brain Graph Transformer (BioBGT) that encodes the small-world architecture inherent in brain graphs. Specifically, we present a network entanglement-based node importance encoding technique that captures the structural importance of nodes in global information propagation during brain graph communication, highlighting the biological properties of the brain structure. Furthermore, we introduce a functional module-aware self-attention to preserve the functional segregation and integration characteristics of brain graphs in the learned representations. Experimental results on three benchmark datasets demonstrate that BioBGT outperforms state-of-the-art models, enhancing biologically plausible brain graph representations for various brain graph analytical tasks <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08958v1-abstract-full').style.display = 'none'; document.getElementById('2502.08958v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27pages, 16figures, published as a conference paper at ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08803">arXiv:2502.08803</a> <span> [<a href="https://arxiv.org/pdf/2502.08803">pdf</a>, <a href="https://arxiv.org/format/2502.08803">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Deep EEG Super-Resolution: Upsampling EEG Spatial Resolution with Generative Adversarial Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Corley%2C+I">Isaac Corley</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yufei Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08803v1-abstract-short" style="display: inline;"> Electroencephalography (EEG) activity contains a wealth of information about what is happening within the human brain. Recording more of this data has the potential to unlock endless future applications. However, the cost of EEG hardware is increasingly expensive based upon the number of EEG channels being recorded simultaneously. We combat this problem in this paper by proposing a novel deep EEG… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08803v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08803v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08803v1-abstract-full" style="display: none;"> Electroencephalography (EEG) activity contains a wealth of information about what is happening within the human brain. Recording more of this data has the potential to unlock endless future applications. However, the cost of EEG hardware is increasingly expensive based upon the number of EEG channels being recorded simultaneously. We combat this problem in this paper by proposing a novel deep EEG super-resolution (SR) approach based on Generative Adversarial Networks (GANs). This approach can produce high spatial resolution EEG data from low resolution samples, by generating channel-wise upsampled data to effectively interpolate numerous missing channels, thus reducing the need for expensive EEG equipment. We tested the performance using an EEG dataset from a mental imagery task. Our proposed GAN model provided 10^4 fold and 10^2 fold reduction in mean-squared error (MSE) and mean-absolute error (MAE), respectively, over the baseline bicubic interpolation method. We further validate our method by training a classifier on the original classification task, which displayed minimal loss in accuracy while using the super-resolved data. The proposed SR EEG by GAN is a promising approach to improve the spatial resolution of low density EEG headsets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08803v1-abstract-full').style.display = 'none'; document.getElementById('2502.08803v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08374">arXiv:2502.08374</a> <span> [<a href="https://arxiv.org/pdf/2502.08374">pdf</a>, <a href="https://arxiv.org/format/2502.08374">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> AdvSwap: Covert Adversarial Perturbation with High Frequency Info-swapping for Autonomous Driving Perception </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yuanhao Huang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qinfan Zhang</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+J">Jiandong Xing</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+M">Mengyue Cheng</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+H">Haiyang Yu</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Y">Yilong Ren</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+X">Xiao Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08374v1-abstract-short" style="display: inline;"> Perception module of Autonomous vehicles (AVs) are increasingly susceptible to be attacked, which exploit vulnerabilities in neural networks through adversarial inputs, thereby compromising the AI safety. Some researches focus on creating covert adversarial samples, but existing global noise techniques are detectable and difficult to deceive the human visual system. This paper introduces a novel a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08374v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08374v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08374v1-abstract-full" style="display: none;"> Perception module of Autonomous vehicles (AVs) are increasingly susceptible to be attacked, which exploit vulnerabilities in neural networks through adversarial inputs, thereby compromising the AI safety. Some researches focus on creating covert adversarial samples, but existing global noise techniques are detectable and difficult to deceive the human visual system. This paper introduces a novel adversarial attack method, AdvSwap, which creatively utilizes wavelet-based high-frequency information swapping to generate covert adversarial samples and fool the camera. AdvSwap employs invertible neural network for selective high-frequency information swapping, preserving both forward propagation and data integrity. The scheme effectively removes the original label data and incorporates the guidance image data, producing concealed and robust adversarial samples. Experimental evaluations and comparisons on the GTSRB and nuScenes datasets demonstrate that AdvSwap can make concealed attacks on common traffic targets. The generates adversarial samples are also difficult to perceive by humans and algorithms. Meanwhile, the method has strong attacking robustness and attacking transferability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08374v1-abstract-full').style.display = 'none'; document.getElementById('2502.08374v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27th IEEE International Conference on Intelligent Transportation Systems (ITSC)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07904">arXiv:2502.07904</a> <span> [<a href="https://arxiv.org/pdf/2502.07904">pdf</a>, <a href="https://arxiv.org/format/2502.07904">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Intelligent Legal Assistant: An Interactive Clarification System for Legal Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+R">Rujing Yao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yiquan Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tong Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xuhui Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yuting Huang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yang Wu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jiayin Yang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+C">Changlong Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+F">Fang Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaozhong Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07904v1-abstract-short" style="display: inline;"> The rise of large language models has opened new avenues for users seeking legal advice. However, users often lack professional legal knowledge, which can lead to questions that omit critical information. This deficiency makes it challenging for traditional legal question-answering systems to accurately identify users' actual needs, often resulting in imprecise or generalized advice. In this work,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07904v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07904v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07904v1-abstract-full" style="display: none;"> The rise of large language models has opened new avenues for users seeking legal advice. However, users often lack professional legal knowledge, which can lead to questions that omit critical information. This deficiency makes it challenging for traditional legal question-answering systems to accurately identify users' actual needs, often resulting in imprecise or generalized advice. In this work, we develop a legal question-answering system called Intelligent Legal Assistant, which interacts with users to precisely capture their needs. When a user poses a question, the system requests that the user select their geographical location to pinpoint the applicable laws. It then generates clarifying questions and options based on the key information missing from the user's initial question. This allows the user to select and provide the necessary details. Once all necessary information is provided, the system produces an in-depth legal analysis encompassing three aspects: overall conclusion, jurisprudential analysis, and resolution suggestions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07904v1-abstract-full').style.display = 'none'; document.getElementById('2502.07904v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07707">arXiv:2502.07707</a> <span> [<a href="https://arxiv.org/pdf/2502.07707">pdf</a>, <a href="https://arxiv.org/format/2502.07707">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PRVQL: Progressive Knowledge-guided Refinement for Robust Egocentric Visual Query Localization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fan%2C+B">Bing Fan</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Y">Yunhe Feng</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Yapeng Tian</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Yuewei Lin</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yan Huang</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+H">Heng Fan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07707v1-abstract-short" style="display: inline;"> Egocentric visual query localization (EgoVQL) focuses on localizing the target of interest in space and time from first-person videos, given a visual query. Despite recent progressive, existing methods often struggle to handle severe object appearance changes and cluttering background in the video due to lacking sufficient target cues, leading to degradation. Addressing this, we introduce PRVQL, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07707v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07707v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07707v1-abstract-full" style="display: none;"> Egocentric visual query localization (EgoVQL) focuses on localizing the target of interest in space and time from first-person videos, given a visual query. Despite recent progressive, existing methods often struggle to handle severe object appearance changes and cluttering background in the video due to lacking sufficient target cues, leading to degradation. Addressing this, we introduce PRVQL, a novel Progressive knowledge-guided Refinement framework for EgoVQL. The core is to continuously exploit target-relevant knowledge directly from videos and utilize it as guidance to refine both query and video features for improving target localization. Our PRVQL contains multiple processing stages. The target knowledge from one stage, comprising appearance and spatial knowledge extracted via two specially designed knowledge learning modules, are utilized as guidance to refine the query and videos features for the next stage, which are used to generate more accurate knowledge for further feature refinement. With such a progressive process, target knowledge in PRVQL can be gradually improved, which, in turn, leads to better refined query and video features for localization in the final stage. Compared to previous methods, our PRVQL, besides the given object cues, enjoys additional crucial target information from a video as guidance to refine features, and hence enhances EgoVQL in complicated scenes. In our experiments on challenging Ego4D, PRVQL achieves state-of-the-art result and largely surpasses other methods, showing its efficacy. Our code, model and results will be released at https://github.com/fb-reps/PRVQL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07707v1-abstract-full').style.display = 'none'; document.getElementById('2502.07707v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07243">arXiv:2502.07243</a> <span> [<a href="https://arxiv.org/pdf/2502.07243">pdf</a>, <a href="https://arxiv.org/format/2502.07243">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Vevo: Controllable Zero-Shot Voice Imitation with Self-Supervised Disentanglement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xueyao Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaohui Zhang</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+K">Kainan Peng</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Z">Zhenyu Tang</a>, <a href="/search/cs?searchtype=author&query=Manohar%2C+V">Vimal Manohar</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yingru Liu</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+J">Jeff Hwang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+D">Dangna Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuhao Wang</a>, <a href="/search/cs?searchtype=author&query=Chan%2C+J">Julian Chan</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yuan Huang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhizheng Wu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+M">Mingbo Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07243v1-abstract-short" style="display: inline;"> The imitation of voice, targeted on specific speech attributes such as timbre and speaking style, is crucial in speech generation. However, existing methods rely heavily on annotated data, and struggle with effectively disentangling timbre and style, leading to challenges in achieving controllable generation, especially in zero-shot scenarios. To address these issues, we propose Vevo, a versatile… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07243v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07243v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07243v1-abstract-full" style="display: none;"> The imitation of voice, targeted on specific speech attributes such as timbre and speaking style, is crucial in speech generation. However, existing methods rely heavily on annotated data, and struggle with effectively disentangling timbre and style, leading to challenges in achieving controllable generation, especially in zero-shot scenarios. To address these issues, we propose Vevo, a versatile zero-shot voice imitation framework with controllable timbre and style. Vevo operates in two core stages: (1) Content-Style Modeling: Given either text or speech's content tokens as input, we utilize an autoregressive transformer to generate the content-style tokens, which is prompted by a style reference; (2) Acoustic Modeling: Given the content-style tokens as input, we employ a flow-matching transformer to produce acoustic representations, which is prompted by a timbre reference. To obtain the content and content-style tokens of speech, we design a fully self-supervised approach that progressively decouples the timbre, style, and linguistic content of speech. Specifically, we adopt VQ-VAE as the tokenizer for the continuous hidden features of HuBERT. We treat the vocabulary size of the VQ-VAE codebook as the information bottleneck, and adjust it carefully to obtain the disentangled speech representations. Solely self-supervised trained on 60K hours of audiobook speech data, without any fine-tuning on style-specific corpora, Vevo matches or surpasses existing methods in accent and emotion conversion tasks. Additionally, Vevo's effectiveness in zero-shot voice conversion and text-to-speech tasks further demonstrates its strong generalization and versatility. Audio samples are available at https://versavoice.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07243v1-abstract-full').style.display = 'none'; document.getElementById('2502.07243v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07194">arXiv:2502.07194</a> <span> [<a href="https://arxiv.org/pdf/2502.07194">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3390/electronics13122312">10.3390/electronics13122312 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Dense Object Detection Based on De-homogenized Queries </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yueming Huang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+C">Chenrui Ma</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Hao Zhou</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Hao Wu</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+G">Guowu Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07194v1-abstract-short" style="display: inline;"> Dense object detection is widely used in automatic driving, video surveillance, and other fields. This paper focuses on the challenging task of dense object detection. Currently, detection methods based on greedy algorithms, such as non-maximum suppression (NMS), often produce many repetitive predictions or missed detections in dense scenarios, which is a common problem faced by NMS-based algorith… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07194v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07194v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07194v1-abstract-full" style="display: none;"> Dense object detection is widely used in automatic driving, video surveillance, and other fields. This paper focuses on the challenging task of dense object detection. Currently, detection methods based on greedy algorithms, such as non-maximum suppression (NMS), often produce many repetitive predictions or missed detections in dense scenarios, which is a common problem faced by NMS-based algorithms. Through the end-to-end DETR (DEtection TRansformer), as a type of detector that can incorporate the post-processing de-duplication capability of NMS, etc., into the network, we found that homogeneous queries in the query-based detector lead to a reduction in the de-duplication capability of the network and the learning efficiency of the encoder, resulting in duplicate prediction and missed detection problems. To solve this problem, we propose learnable differentiated encoding to de-homogenize the queries, and at the same time, queries can communicate with each other via differentiated encoding information, replacing the previous self-attention among the queries. In addition, we used joint loss on the output of the encoder that considered both location and confidence prediction to give a higher-quality initialization for queries. Without cumbersome decoder stacking and guaranteeing accuracy, our proposed end-to-end detection framework was more concise and reduced the number of parameters by about 8% compared to deformable DETR. Our method achieved excellent results on the challenging CrowdHuman dataset with 93.6% average precision (AP), 39.2% MR-2, and 84.3% JI. The performance overperformed previous SOTA methods, such as Iter-E2EDet (Progressive End-to-End Object Detection) and MIP (One proposal, Multiple predictions). In addition, our method is more robust in various scenarios with different densities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07194v1-abstract-full').style.display = 'none'; document.getElementById('2502.07194v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 15 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06913">arXiv:2502.06913</a> <span> [<a href="https://arxiv.org/pdf/2502.06913">pdf</a>, <a href="https://arxiv.org/format/2502.06913">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Simple yet Effective DDG Predictor is An Unsupervised Antibody Optimizer and Explainer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+L">Lirong Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yunfan Liu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Haitao Lin</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yufei Huang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+G">Guojiang Zhao</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Z">Zhifeng Gao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S+Z">Stan Z. Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06913v2-abstract-short" style="display: inline;"> The proteins that exist today have been optimized over billions of years of natural evolution, during which nature creates random mutations and selects them. The discovery of functionally promising mutations is challenged by the limited evolutionary accessible regions, i.e., only a small region on the fitness landscape is beneficial. There have been numerous priors used to constrain protein evolut… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06913v2-abstract-full').style.display = 'inline'; document.getElementById('2502.06913v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06913v2-abstract-full" style="display: none;"> The proteins that exist today have been optimized over billions of years of natural evolution, during which nature creates random mutations and selects them. The discovery of functionally promising mutations is challenged by the limited evolutionary accessible regions, i.e., only a small region on the fitness landscape is beneficial. There have been numerous priors used to constrain protein evolution to regions of landscapes with high-fitness variants, among which the change in binding free energy (DDG) of protein complexes upon mutations is one of the most commonly used priors. However, the huge mutation space poses two challenges: (1) how to improve the efficiency of DDG prediction for fast mutation screening; and (2) how to explain mutation preferences and efficiently explore accessible evolutionary regions. To address these challenges, we propose a lightweight DDG predictor (Light-DDG), which adopts a structure-aware Transformer as the backbone and enhances it by knowledge distilled from existing powerful but computationally heavy DDG predictors. Additionally, we augmented, annotated, and released a large-scale dataset containing millions of mutation data for pre-training Light-DDG. We find that such a simple yet effective Light-DDG can serve as a good unsupervised antibody optimizer and explainer. For the target antibody, we propose a novel Mutation Explainer to learn mutation preferences, which accounts for the marginal benefit of each mutation per residue. To further explore accessible evolutionary regions, we conduct preference-guided antibody optimization and evaluate antibody candidates quickly using Light-DDG to identify desirable mutations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06913v2-abstract-full').style.display = 'none'; document.getElementById('2502.06913v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06888">arXiv:2502.06888</a> <span> [<a href="https://arxiv.org/pdf/2502.06888">pdf</a>, <a href="https://arxiv.org/format/2502.06888">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Klotski: Efficient Mixture-of-Expert Inference via Expert-Aware Multi-Batch Pipeline </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fang%2C+Z">Zhiyuan Fang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yuegui Huang</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+Z">Zicong Hong</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+Y">Yufeng Lyu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wuhui Chen</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yue Yu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+F">Fan Yu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Z">Zibin Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06888v1-abstract-short" style="display: inline;"> Mixture of Experts (MoE), with its distinctive sparse structure, enables the scaling of language models up to trillions of parameters without significantly increasing computational costs. However, the substantial parameter size presents a challenge for inference, as the expansion in GPU memory cannot keep pace with the growth in parameters. Although offloading techniques utilise memory from the CP… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06888v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06888v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06888v1-abstract-full" style="display: none;"> Mixture of Experts (MoE), with its distinctive sparse structure, enables the scaling of language models up to trillions of parameters without significantly increasing computational costs. However, the substantial parameter size presents a challenge for inference, as the expansion in GPU memory cannot keep pace with the growth in parameters. Although offloading techniques utilise memory from the CPU and disk and parallelise the I/O and computation for efficiency, the computation for each expert in MoE models is often less than the I/O, resulting in numerous bubbles in the pipeline. Therefore, we propose Klotski, an efficient MoE inference engine that significantly reduces pipeline bubbles through a novel expert-aware multi-batch pipeline paradigm. The proposed paradigm uses batch processing to extend the computation time of the current layer to overlap with the loading time of the next layer. Although this idea has been effectively applied to dense models, more batches may activate more experts in the MoE, leading to longer loading times and more bubbles. Thus, unlike traditional approaches, we balance computation and I/O time and minimise bubbles by orchestrating their inference orders based on their heterogeneous computation and I/O requirements and activation patterns under different batch numbers. Moreover, to adapt to different hardware environments and models, we design a constraint-sensitive I/O-compute planner and a correlation-aware expert prefetcher for a schedule that minimises pipeline bubbles. Experimental results demonstrate that Klotski achieves a superior throughput-latency trade-off compared to state-of-the-art techniques, with throughput improvements of up to 85.12x. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06888v1-abstract-full').style.display = 'none'; document.getElementById('2502.06888v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06805">arXiv:2502.06805</a> <span> [<a href="https://arxiv.org/pdf/2502.06805">pdf</a>, <a href="https://arxiv.org/format/2502.06805">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> Efficient Diffusion Models: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shen%2C+H">Hui Shen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jingxuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+B">Boning Xiong</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+R">Rui Hu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shoufa Chen</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+Z">Zhongwei Wan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xin Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yu Zhang</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+Z">Zixuan Gong</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+G">Guangyin Bao</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+C">Chaofan Tao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yongfeng Huang</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+Y">Ye Yuan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Mi Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06805v1-abstract-short" style="display: inline;"> Diffusion models have emerged as powerful generative models capable of producing high-quality contents such as images, videos, and audio, demonstrating their potential to revolutionize digital content creation. However, these capabilities come at the cost of their significant computational resources and lengthy generation time, underscoring the critical need to develop efficient techniques for pra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06805v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06805v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06805v1-abstract-full" style="display: none;"> Diffusion models have emerged as powerful generative models capable of producing high-quality contents such as images, videos, and audio, demonstrating their potential to revolutionize digital content creation. However, these capabilities come at the cost of their significant computational resources and lengthy generation time, underscoring the critical need to develop efficient techniques for practical deployment. In this survey, we provide a systematic and comprehensive review of research on efficient diffusion models. We organize the literature in a taxonomy consisting of three main categories, covering distinct yet interconnected efficient diffusion model topics from algorithm-level, system-level, and framework perspective, respectively. We have also created a GitHub repository where we organize the papers featured in this survey at https://github.com/AIoT-MLSys-Lab/Efficient-Diffusion-Model-Survey. We hope our survey can serve as a valuable resource to help researchers and practitioners gain a systematic understanding of efficient diffusion model research and inspire them to contribute to this important and exciting field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06805v1-abstract-full').style.display = 'none'; document.getElementById('2502.06805v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06734">arXiv:2502.06734</a> <span> [<a href="https://arxiv.org/pdf/2502.06734">pdf</a>, <a href="https://arxiv.org/format/2502.06734">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Se帽orita-2M: A High-Quality Instruction-based Dataset for General Video Editing by Video Specialists </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zi%2C+B">Bojia Zi</a>, <a href="/search/cs?searchtype=author&query=Ruan%2C+P">Penghui Ruan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Marco Chen</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+X">Xianbiao Qi</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+S">Shaozhe Hao</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Shihao Zhao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Youze Huang</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+B">Bin Liang</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+R">Rong Xiao</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+K">Kam-Fai Wong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06734v1-abstract-short" style="display: inline;"> Recent advancements in video generation have spurred the development of video editing techniques, which can be divided into inversion-based and end-to-end methods. However, current video editing methods still suffer from several challenges. Inversion-based methods, though training-free and flexible, are time-consuming during inference, struggle with fine-grained editing instructions, and produce a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06734v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06734v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06734v1-abstract-full" style="display: none;"> Recent advancements in video generation have spurred the development of video editing techniques, which can be divided into inversion-based and end-to-end methods. However, current video editing methods still suffer from several challenges. Inversion-based methods, though training-free and flexible, are time-consuming during inference, struggle with fine-grained editing instructions, and produce artifacts and jitter. On the other hand, end-to-end methods, which rely on edited video pairs for training, offer faster inference speeds but often produce poor editing results due to a lack of high-quality training video pairs. In this paper, to close the gap in end-to-end methods, we introduce Se帽orita-2M, a high-quality video editing dataset. Se帽orita-2M consists of approximately 2 millions of video editing pairs. It is built by crafting four high-quality, specialized video editing models, each crafted and trained by our team to achieve state-of-the-art editing results. We also propose a filtering pipeline to eliminate poorly edited video pairs. Furthermore, we explore common video editing architectures to identify the most effective structure based on current pre-trained generative model. Extensive experiments show that our dataset can help to yield remarkably high-quality video editing results. More details are available at https://senorita.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06734v1-abstract-full').style.display = 'none'; document.getElementById('2502.06734v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06453">arXiv:2502.06453</a> <span> [<a href="https://arxiv.org/pdf/2502.06453">pdf</a>, <a href="https://arxiv.org/format/2502.06453">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> MATH-Perturb: Benchmarking LLMs' Math Reasoning Abilities against Hard Perturbations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaixuan Huang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jiacheng Guo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zihao Li</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+X">Xiang Ji</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+J">Jiawei Ge</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wenzhe Li</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yingqing Guo</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+T">Tianle Cai</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+H">Hui Yuan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Runzhe Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yue Wu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Ming Yin</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+S">Shange Tang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yangsibo Huang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+C">Chi Jin</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xinyun Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chiyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+M">Mengdi Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06453v2-abstract-short" style="display: inline;"> Large language models have demonstrated impressive performance on challenging mathematical reasoning tasks, which has triggered the discussion of whether the performance is achieved by true reasoning capability or memorization. To investigate this question, prior work has constructed mathematical benchmarks when questions undergo simple perturbations -- modifications that still preserve the underl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06453v2-abstract-full').style.display = 'inline'; document.getElementById('2502.06453v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06453v2-abstract-full" style="display: none;"> Large language models have demonstrated impressive performance on challenging mathematical reasoning tasks, which has triggered the discussion of whether the performance is achieved by true reasoning capability or memorization. To investigate this question, prior work has constructed mathematical benchmarks when questions undergo simple perturbations -- modifications that still preserve the underlying reasoning patterns of the solutions. However, no work has explored hard perturbations, which fundamentally change the nature of the problem so that the original solution steps do not apply. To bridge the gap, we construct MATH-P-Simple and MATH-P-Hard via simple perturbation and hard perturbation, respectively. Each consists of 279 perturbed math problems derived from level-5 (hardest) problems in the MATH dataset (Hendrycksmath et. al., 2021). We observe significant performance drops on MATH-P-Hard across various models, including o1-mini (-16.49%) and gemini-2.0-flash-thinking (-12.9%). We also raise concerns about a novel form of memorization where models blindly apply learned problem-solving skills without assessing their applicability to modified contexts. This issue is amplified when using original problems for in-context learning. We call for research efforts to address this challenge, which is critical for developing more robust and reliable reasoning models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06453v2-abstract-full').style.display = 'none'; document.getElementById('2502.06453v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">v2: fix bugs in Fig. 1</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06059">arXiv:2502.06059</a> <span> [<a href="https://arxiv.org/pdf/2502.06059">pdf</a>, <a href="https://arxiv.org/format/2502.06059">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Position: We Need An Adaptive Interpretation of Helpful, Honest, and Harmless Principles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yue Huang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+C">Chujie Gao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yujun Zhou</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+K">Kehan Guo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiangqi Wang</a>, <a href="/search/cs?searchtype=author&query=Cohen-Sasson%2C+O">Or Cohen-Sasson</a>, <a href="/search/cs?searchtype=author&query=Lamparth%2C+M">Max Lamparth</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiangliang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06059v1-abstract-short" style="display: inline;"> The Helpful, Honest, and Harmless (HHH) principle is a foundational framework for aligning AI systems with human values. However, existing interpretations of the HHH principle often overlook contextual variability and conflicting requirements across applications. In this paper, we argue for an adaptive interpretation of the HHH principle and propose a reference framework for its adaptation to dive… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06059v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06059v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06059v1-abstract-full" style="display: none;"> The Helpful, Honest, and Harmless (HHH) principle is a foundational framework for aligning AI systems with human values. However, existing interpretations of the HHH principle often overlook contextual variability and conflicting requirements across applications. In this paper, we argue for an adaptive interpretation of the HHH principle and propose a reference framework for its adaptation to diverse scenarios. We first examine the principle's foundational significance and identify ambiguities and conflicts through case studies of its dimensions. To address these challenges, we introduce the concept of priority order, which provides a structured approach for balancing trade-offs among helpfulness, honesty, and harmlessness. Further, we explore the interrelationships between these dimensions, demonstrating how harmlessness and helpfulness can be jointly enhanced and analyzing their interdependencies in high-risk evaluations. Building on these insights, we propose a reference framework that integrates context definition, value prioritization, risk assessment, and benchmarking standards to guide the adaptive application of the HHH principle. This work offers practical insights for improving AI alignment, ensuring that HHH principles remain both ethically grounded and operationally effective in real-world AI deployment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06059v1-abstract-full').style.display = 'none'; document.getElementById('2502.06059v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05943">arXiv:2502.05943</a> <span> [<a href="https://arxiv.org/pdf/2502.05943">pdf</a>, <a href="https://arxiv.org/format/2502.05943">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Sustainable Adaptation for Autonomous Driving with the Mixture of Progressive Experts Networ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cui%2C+Y">Yixin Cui</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Shuo Yang</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+C">Chi Wan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xincheng Li</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+J">Jiaming Xing</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuanjian Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yanjun Huang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05943v1-abstract-short" style="display: inline;"> Learning-based autonomous driving methods require continuous acquisition of domain knowledge to adapt to diverse driving scenarios. However, due to the inherent challenges of long-tailed data distribution, current approaches still face limitations in complex and dynamic driving environments, particularly when encountering new scenarios and data. This underscores the necessity for enhanced continua… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05943v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05943v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05943v1-abstract-full" style="display: none;"> Learning-based autonomous driving methods require continuous acquisition of domain knowledge to adapt to diverse driving scenarios. However, due to the inherent challenges of long-tailed data distribution, current approaches still face limitations in complex and dynamic driving environments, particularly when encountering new scenarios and data. This underscores the necessity for enhanced continual learning capabilities to improve system adaptability. To address these challenges, the paper introduces a dynamic progressive optimization framework that facilitates adaptation to variations in dynamic environments, achieved by integrating reinforcement learning and supervised learning for data aggregation. Building on this framework, we propose the Mixture of Progressive Experts (MoPE) network. The proposed method selectively activates multiple expert models based on the distinct characteristics of each task and progressively refines the network architecture to facilitate adaptation to new tasks. Simulation results show that the MoPE model outperforms behavior cloning methods, achieving up to a 7.3% performance improvement in intricate urban road environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05943v1-abstract-full').style.display = 'none'; document.getElementById('2502.05943v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05371">arXiv:2502.05371</a> <span> [<a href="https://arxiv.org/pdf/2502.05371">pdf</a>, <a href="https://arxiv.org/ps/2502.05371">ps</a>, <a href="https://arxiv.org/format/2502.05371">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Mathematical Physics">math-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Cumulant Structures of Entanglement Entropy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Youyi Huang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+L">Lu Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05371v1-abstract-short" style="display: inline;"> We present a new method to derive exact cumulant expressions of any order of von Neumann entropy over Hilbert-Schmidt ensemble. The new method uncovers hidden cumulant structures that decouple each cumulant in a summation-free manner into its lower-order joint cumulants involving families of ancillary statistics. Importantly, the new method is able to avoid the seemingly inevitable task of simplif… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05371v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05371v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05371v1-abstract-full" style="display: none;"> We present a new method to derive exact cumulant expressions of any order of von Neumann entropy over Hilbert-Schmidt ensemble. The new method uncovers hidden cumulant structures that decouple each cumulant in a summation-free manner into its lower-order joint cumulants involving families of ancillary statistics. Importantly, the new method is able to avoid the seemingly inevitable task of simplifying nested summations of increasing difficulty that prevents the existing method in the literature to obtain higher-order cumulants. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05371v1-abstract-full').style.display = 'none'; document.getElementById('2502.05371v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05176">arXiv:2502.05176</a> <span> [<a href="https://arxiv.org/pdf/2502.05176">pdf</a>, <a href="https://arxiv.org/format/2502.05176">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> AuraFusion360: Augmented Unseen Region Alignment for Reference-based 360掳 Unbounded Scene Inpainting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+C">Chung-Ho Wu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yang-Jung Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Ying-Huan Chen</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Jie-Ying Lee</a>, <a href="/search/cs?searchtype=author&query=Ke%2C+B">Bo-Hsu Ke</a>, <a href="/search/cs?searchtype=author&query=Mu%2C+C+T">Chun-Wei Tuan Mu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yi-Chuan Huang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+C">Chin-Yang Lin</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Min-Hung Chen</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Yen-Yu Lin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yu-Lun Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05176v1-abstract-short" style="display: inline;"> Three-dimensional scene inpainting is crucial for applications from virtual reality to architectural visualization, yet existing methods struggle with view consistency and geometric accuracy in 360掳 unbounded scenes. We present AuraFusion360, a novel reference-based method that enables high-quality object removal and hole filling in 3D scenes represented by Gaussian Splatting. Our approach introdu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05176v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05176v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05176v1-abstract-full" style="display: none;"> Three-dimensional scene inpainting is crucial for applications from virtual reality to architectural visualization, yet existing methods struggle with view consistency and geometric accuracy in 360掳 unbounded scenes. We present AuraFusion360, a novel reference-based method that enables high-quality object removal and hole filling in 3D scenes represented by Gaussian Splatting. Our approach introduces (1) depth-aware unseen mask generation for accurate occlusion identification, (2) Adaptive Guided Depth Diffusion, a zero-shot method for accurate initial point placement without requiring additional training, and (3) SDEdit-based detail enhancement for multi-view coherence. We also introduce 360-USID, the first comprehensive dataset for 360掳 unbounded scene inpainting with ground truth. Extensive experiments demonstrate that AuraFusion360 significantly outperforms existing methods, achieving superior perceptual quality while maintaining geometric accuracy across dramatic viewpoint changes. See our project page for video results and the dataset at https://kkennethwu.github.io/aurafusion360/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05176v1-abstract-full').style.display = 'none'; document.getElementById('2502.05176v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://kkennethwu.github.io/aurafusion360/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04638">arXiv:2502.04638</a> <span> [<a href="https://arxiv.org/pdf/2502.04638">pdf</a>, <a href="https://arxiv.org/format/2502.04638">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Learning Street View Representations with Spatiotemporal Contrast </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yong Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yingjing Huang</a>, <a href="/search/cs?searchtype=author&query=Mai%2C+G">Gengchen Mai</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+F">Fan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04638v1-abstract-short" style="display: inline;"> Street view imagery is extensively utilized in representation learning for urban visual environments, supporting various sustainable development tasks such as environmental perception and socio-economic assessment. However, it is challenging for existing image representations to specifically encode the dynamic urban environment (such as pedestrians, vehicles, and vegetation), the built environment… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04638v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04638v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04638v1-abstract-full" style="display: none;"> Street view imagery is extensively utilized in representation learning for urban visual environments, supporting various sustainable development tasks such as environmental perception and socio-economic assessment. However, it is challenging for existing image representations to specifically encode the dynamic urban environment (such as pedestrians, vehicles, and vegetation), the built environment (including buildings, roads, and urban infrastructure), and the environmental ambiance (such as the cultural and socioeconomic atmosphere) depicted in street view imagery to address downstream tasks related to the city. In this work, we propose an innovative self-supervised learning framework that leverages temporal and spatial attributes of street view imagery to learn image representations of the dynamic urban environment for diverse downstream tasks. By employing street view images captured at the same location over time and spatially nearby views at the same time, we construct contrastive learning tasks designed to learn the temporal-invariant characteristics of the built environment and the spatial-invariant neighborhood ambiance. Our approach significantly outperforms traditional supervised and unsupervised methods in tasks such as visual place recognition, socioeconomic estimation, and human-environment perception. Moreover, we demonstrate the varying behaviors of image representations learned through different contrastive learning objectives across various downstream tasks. This study systematically discusses representation learning strategies for urban studies based on street view images, providing a benchmark that enhances the applicability of visual data in urban science. The code is available at https://github.com/yonglleee/UrbanSTCL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04638v1-abstract-full').style.display = 'none'; document.getElementById('2502.04638v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04563">arXiv:2502.04563</a> <span> [<a href="https://arxiv.org/pdf/2502.04563">pdf</a>, <a href="https://arxiv.org/format/2502.04563">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> </div> </div> <p class="title is-5 mathjax"> WaferLLM: A Wafer-Scale LLM Inference System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=He%2C+C">Congjie He</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yeqi Huang</a>, <a href="/search/cs?searchtype=author&query=Mu%2C+P">Pei Mu</a>, <a href="/search/cs?searchtype=author&query=Miao%2C+Z">Ziming Miao</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+J">Jilong Xue</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Lingxiao Ma</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+F">Fan Yang</a>, <a href="/search/cs?searchtype=author&query=Mai%2C+L">Luo Mai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04563v1-abstract-short" style="display: inline;"> Emerging AI accelerators increasingly adopt wafer-scale manufacturing technologies, integrating hundreds of thousands of AI cores in a mesh-based architecture with large distributed on-chip memory (tens of GB in total) and ultra-high on-chip memory bandwidth (tens of PB/s). However, current LLM inference systems, optimized for shared memory architectures like GPUs, fail to fully exploit these acce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04563v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04563v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04563v1-abstract-full" style="display: none;"> Emerging AI accelerators increasingly adopt wafer-scale manufacturing technologies, integrating hundreds of thousands of AI cores in a mesh-based architecture with large distributed on-chip memory (tens of GB in total) and ultra-high on-chip memory bandwidth (tens of PB/s). However, current LLM inference systems, optimized for shared memory architectures like GPUs, fail to fully exploit these accelerators. We introduce WaferLLM, the first wafer-scale LLM inference system. WaferLLM is guided by a novel PLMR device model that captures the unique hardware characteristics of wafer-scale architectures. Leveraging this model, WaferLLM pioneers wafer-scale LLM parallelism, optimizing the utilization of hundreds of thousands of on-chip cores. It also introduces MeshGEMM and MeshGEMV, the first GEMM and GEMV implementations designed to scale effectively on wafer-scale accelerators. Evaluations show that WaferLLM achieves 200$\times$ better wafer-scale accelerator utilization than state-of-the-art systems. On a commodity wafer-scale accelerator, WaferLLM delivers 606$\times$ faster and 22$\times$ more energy-efficient GEMV compared to an advanced GPU. For LLMs, WaferLLM enables 39$\times$ faster decoding with 1.7$\times$ better energy efficiency. We anticipate these numbers will grow significantly as wafer-scale AI models, software, and hardware continue to mature. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04563v1-abstract-full').style.display = 'none'; document.getElementById('2502.04563v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04397">arXiv:2502.04397</a> <span> [<a href="https://arxiv.org/pdf/2502.04397">pdf</a>, <a href="https://arxiv.org/format/2502.04397">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Multimodal Medical Code Tokenizer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Su%2C+X">Xiaorui Su</a>, <a href="/search/cs?searchtype=author&query=Messica%2C+S">Shvat Messica</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yepeng Huang</a>, <a href="/search/cs?searchtype=author&query=Johnson%2C+R">Ruth Johnson</a>, <a href="/search/cs?searchtype=author&query=Fesser%2C+L">Lukas Fesser</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+S">Shanghua Gao</a>, <a href="/search/cs?searchtype=author&query=Sahneh%2C+F">Faryad Sahneh</a>, <a href="/search/cs?searchtype=author&query=Zitnik%2C+M">Marinka Zitnik</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04397v2-abstract-short" style="display: inline;"> Foundation models trained on patient electronic health records (EHRs) require tokenizing medical data into sequences of discrete vocabulary items. Existing tokenizers treat medical codes from EHRs as isolated textual tokens. However, each medical code is defined by its textual description, its position in ontological hierarchies, and its relationships to other codes, such as disease co-occurrences… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04397v2-abstract-full').style.display = 'inline'; document.getElementById('2502.04397v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04397v2-abstract-full" style="display: none;"> Foundation models trained on patient electronic health records (EHRs) require tokenizing medical data into sequences of discrete vocabulary items. Existing tokenizers treat medical codes from EHRs as isolated textual tokens. However, each medical code is defined by its textual description, its position in ontological hierarchies, and its relationships to other codes, such as disease co-occurrences and drug-treatment associations. Medical vocabularies contain more than 600,000 codes with critical information for clinical reasoning. We introduce MedTok, a multimodal medical code tokenizer that uses the text descriptions and relational context of codes. MedTok processes text using a language model encoder and encodes the relational structure with a graph encoder. It then quantizes both modalities into a unified token space, preserving modality-specific and cross-modality information. We integrate MedTok into five EHR models and evaluate it on operational and clinical tasks across in-patient and out-patient datasets, including outcome prediction, diagnosis classification, drug recommendation, and risk stratification. Swapping standard EHR tokenizers with MedTok improves AUPRC across all EHR models, by 4.10% on MIMIC-III, 4.78% on MIMIC-IV, and 11.30% on EHRShot, with the largest gains in drug recommendation. Beyond EHR modeling, we demonstrate using MedTok tokenizer with medical QA systems. Our results demonstrate the potential of MedTok as a unified tokenizer for medical codes, improving tokenization for medical foundation models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04397v2-abstract-full').style.display = 'none'; document.getElementById('2502.04397v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">conference</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04308">arXiv:2502.04308</a> <span> [<a href="https://arxiv.org/pdf/2502.04308">pdf</a>, <a href="https://arxiv.org/format/2502.04308">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Physics and Society">physics.soc-ph</span> </div> </div> <p class="title is-5 mathjax"> HOG-Diff: Higher-Order Guided Diffusion for Graph Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yiming Huang</a>, <a href="/search/cs?searchtype=author&query=Birdal%2C+T">Tolga Birdal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04308v1-abstract-short" style="display: inline;"> Graph generation is a critical yet challenging task as empirical analyses require a deep understanding of complex, non-Euclidean structures. Although diffusion models have recently made significant achievements in graph generation, these models typically adapt from the frameworks designed for image generation, making them ill-suited for capturing the topological properties of graphs. In this work,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04308v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04308v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04308v1-abstract-full" style="display: none;"> Graph generation is a critical yet challenging task as empirical analyses require a deep understanding of complex, non-Euclidean structures. Although diffusion models have recently made significant achievements in graph generation, these models typically adapt from the frameworks designed for image generation, making them ill-suited for capturing the topological properties of graphs. In this work, we propose a novel Higher-order Guided Diffusion (HOG-Diff) model that follows a coarse-to-fine generation curriculum and is guided by higher-order information, enabling the progressive generation of plausible graphs with inherent topological structures. We further prove that our model exhibits a stronger theoretical guarantee than classical diffusion frameworks. Extensive experiments on both molecular and generic graph generation tasks demonstrate that our method consistently outperforms or remains competitive with state-of-the-art baselines. Our code is available at https://github.com/Yiminghh/HOG-Diff. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04308v1-abstract-full').style.display = 'none'; document.getElementById('2502.04308v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03999">arXiv:2502.03999</a> <span> [<a href="https://arxiv.org/pdf/2502.03999">pdf</a>, <a href="https://arxiv.org/format/2502.03999">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Self-supervised Multimodal Deep Learning Approach to Differentiate Post-radiotherapy Progression from Pseudoprogression in Glioblastoma </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gomaa%2C+A">Ahmed Gomaa</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yixing Huang</a>, <a href="/search/cs?searchtype=author&query=Stephan%2C+P">Pluvio Stephan</a>, <a href="/search/cs?searchtype=author&query=Breininger%2C+K">Katharina Breininger</a>, <a href="/search/cs?searchtype=author&query=Frey%2C+B">Benjamin Frey</a>, <a href="/search/cs?searchtype=author&query=D%C3%B6rfler%2C+A">Arnd D枚rfler</a>, <a href="/search/cs?searchtype=author&query=Schnell%2C+O">Oliver Schnell</a>, <a href="/search/cs?searchtype=author&query=Delev%2C+D">Daniel Delev</a>, <a href="/search/cs?searchtype=author&query=Coras%2C+R">Roland Coras</a>, <a href="/search/cs?searchtype=author&query=Schmitter%2C+C">Charlotte Schmitter</a>, <a href="/search/cs?searchtype=author&query=Stritzelberger%2C+J">Jenny Stritzelberger</a>, <a href="/search/cs?searchtype=author&query=Semrau%2C+S">Sabine Semrau</a>, <a href="/search/cs?searchtype=author&query=Maier%2C+A">Andreas Maier</a>, <a href="/search/cs?searchtype=author&query=Bayer%2C+S">Siming Bayer</a>, <a href="/search/cs?searchtype=author&query=Sch%C3%B6necker%2C+S">Stephan Sch枚necker</a>, <a href="/search/cs?searchtype=author&query=Heiland%2C+D+H">Dieter H Heiland</a>, <a href="/search/cs?searchtype=author&query=Hau%2C+P">Peter Hau</a>, <a href="/search/cs?searchtype=author&query=Gaipl%2C+U+S">Udo S. Gaipl</a>, <a href="/search/cs?searchtype=author&query=Bert%2C+C">Christoph Bert</a>, <a href="/search/cs?searchtype=author&query=Fietkau%2C+R">Rainer Fietkau</a>, <a href="/search/cs?searchtype=author&query=Schmidt%2C+M+A">Manuel A. Schmidt</a>, <a href="/search/cs?searchtype=author&query=Putz%2C+F">Florian Putz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03999v1-abstract-short" style="display: inline;"> Accurate differentiation of pseudoprogression (PsP) from True Progression (TP) following radiotherapy (RT) in glioblastoma (GBM) patients is crucial for optimal treatment planning. However, this task remains challenging due to the overlapping imaging characteristics of PsP and TP. This study therefore proposes a multimodal deep-learning approach utilizing complementary information from routine ana… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03999v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03999v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03999v1-abstract-full" style="display: none;"> Accurate differentiation of pseudoprogression (PsP) from True Progression (TP) following radiotherapy (RT) in glioblastoma (GBM) patients is crucial for optimal treatment planning. However, this task remains challenging due to the overlapping imaging characteristics of PsP and TP. This study therefore proposes a multimodal deep-learning approach utilizing complementary information from routine anatomical MR images, clinical parameters, and RT treatment planning information for improved predictive accuracy. The approach utilizes a self-supervised Vision Transformer (ViT) to encode multi-sequence MR brain volumes to effectively capture both global and local context from the high dimensional input. The encoder is trained in a self-supervised upstream task on unlabeled glioma MRI datasets from the open BraTS2021, UPenn-GBM, and UCSF-PDGM datasets to generate compact, clinically relevant representations from FLAIR and T1 post-contrast sequences. These encoded MR inputs are then integrated with clinical data and RT treatment planning information through guided cross-modal attention, improving progression classification accuracy. This work was developed using two datasets from different centers: the Burdenko Glioblastoma Progression Dataset (n = 59) for training and validation, and the GlioCMV progression dataset from the University Hospital Erlangen (UKER) (n = 20) for testing. The proposed method achieved an AUC of 75.3%, outperforming the current state-of-the-art data-driven approaches. Importantly, the proposed approach relies on readily available anatomical MRI sequences, clinical data, and RT treatment planning information, enhancing its clinical feasibility. The proposed approach addresses the challenge of limited data availability for PsP and TP differentiation and could allow for improved clinical decision-making and optimized treatment plans for GBM patients. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03999v1-abstract-full').style.display = 'none'; document.getElementById('2502.03999v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03766">arXiv:2502.03766</a> <span> [<a href="https://arxiv.org/pdf/2502.03766">pdf</a>, <a href="https://arxiv.org/ps/2502.03766">ps</a>, <a href="https://arxiv.org/format/2502.03766">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Hierarchical Contextual Manifold Alignment for Structuring Latent Representations in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dong%2C+M">Meiquan Dong</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Haoran Liu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yan Huang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Z">Zixuan Feng</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jianhong Tang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Ruoxi Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03766v1-abstract-short" style="display: inline;"> The organization of latent token representations plays a crucial role in determining the stability, generalization, and contextual consistency of language models, yet conventional approaches to embedding refinement often rely on parameter modifications that introduce additional computational overhead. A hierarchical alignment method was introduced to restructure token embeddings without altering c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03766v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03766v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03766v1-abstract-full" style="display: none;"> The organization of latent token representations plays a crucial role in determining the stability, generalization, and contextual consistency of language models, yet conventional approaches to embedding refinement often rely on parameter modifications that introduce additional computational overhead. A hierarchical alignment method was introduced to restructure token embeddings without altering core model weights, ensuring that representational distributions maintained coherence across different linguistic contexts. Experimental evaluations demonstrated improvements in rare token retrieval, adversarial robustness, and long-range dependency tracking, highlighting the advantages of hierarchical structuring in mitigating inconsistencies in latent space organization. The comparative analysis against conventional fine-tuning and embedding perturbation methods revealed that hierarchical restructuring maintained computational efficiency while achieving measurable gains in representation quality. Structural refinements introduced through the alignment process resulted in improved contextual stability across varied linguistic tasks, reducing inconsistencies in token proximity relationships and enhancing interpretability in language generation. A detailed computational assessment confirmed that the realignment process introduced minimal inference overhead, ensuring that representational improvements did not compromise model efficiency. The findings reinforced the broader significance of structured representation learning, illustrating that hierarchical embedding modifications could serve as an effective strategy for refining latent space distributions while preserving pre-learned semantic associations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03766v1-abstract-full').style.display = 'none'; document.getElementById('2502.03766v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03072">arXiv:2502.03072</a> <span> [<a href="https://arxiv.org/pdf/2502.03072">pdf</a>, <a href="https://arxiv.org/format/2502.03072">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> RoboGrasp: A Universal Grasping Policy for Robust Robotic Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yiqi Huang</a>, <a href="/search/cs?searchtype=author&query=Davies%2C+T">Travis Davies</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+J">Jiahuan Yan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiang Chen</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Yu Tian</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+L">Luhui Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03072v1-abstract-short" style="display: inline;"> Imitation learning and world models have shown significant promise in advancing generalizable robotic learning, with robotic grasping remaining a critical challenge for achieving precise manipulation. Existing methods often rely heavily on robot arm state data and RGB images, leading to overfitting to specific object shapes or positions. To address these limitations, we propose RoboGrasp, a univer… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03072v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03072v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03072v1-abstract-full" style="display: none;"> Imitation learning and world models have shown significant promise in advancing generalizable robotic learning, with robotic grasping remaining a critical challenge for achieving precise manipulation. Existing methods often rely heavily on robot arm state data and RGB images, leading to overfitting to specific object shapes or positions. To address these limitations, we propose RoboGrasp, a universal grasping policy framework that integrates pretrained grasp detection models with robotic learning. By leveraging robust visual guidance from object detection and segmentation tasks, RoboGrasp significantly enhances grasp precision, stability, and generalizability, achieving up to 34% higher success rates in few-shot learning and grasping box prompt tasks. Built on diffusion-based methods, RoboGrasp is adaptable to various robotic learning paradigms, enabling precise and reliable manipulation across diverse and complex scenarios. This framework represents a scalable and versatile solution for tackling real-world challenges in robotic grasping. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03072v1-abstract-full').style.display = 'none'; document.getElementById('2502.03072v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03041">arXiv:2502.03041</a> <span> [<a href="https://arxiv.org/pdf/2502.03041">pdf</a>, <a href="https://arxiv.org/format/2502.03041">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Large Language Models Are Universal Recommendation Learners </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Junguang Jiang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yanwen Huang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+B">Bin Liu</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+X">Xiaoyu Kong</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Ziru Xu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+H">Han Zhu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jian Xu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+B">Bo Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03041v1-abstract-short" style="display: inline;"> In real-world recommender systems, different tasks are typically addressed using supervised learning on task-specific datasets with carefully designed model architectures. We demonstrate that large language models (LLMs) can function as universal recommendation learners, capable of handling multiple tasks within a unified input-output framework, eliminating the need for specialized model designs.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03041v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03041v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03041v1-abstract-full" style="display: none;"> In real-world recommender systems, different tasks are typically addressed using supervised learning on task-specific datasets with carefully designed model architectures. We demonstrate that large language models (LLMs) can function as universal recommendation learners, capable of handling multiple tasks within a unified input-output framework, eliminating the need for specialized model designs. To improve the recommendation performance of LLMs, we introduce a multimodal fusion module for item representation and a sequence-in-set-out approach for efficient candidate generation. When applied to industrial-scale data, our LLM achieves competitive results with expert models elaborately designed for different recommendation tasks. Furthermore, our analysis reveals that recommendation outcomes are highly sensitive to text input, highlighting the potential of prompt engineering in optimizing industrial-scale recommender systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03041v1-abstract-full').style.display = 'none'; document.getElementById('2502.03041v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02630">arXiv:2502.02630</a> <span> [<a href="https://arxiv.org/pdf/2502.02630">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> scBIT: Integrating Single-cell Transcriptomic Data into fMRI-based Prediction for Alzheimer's Disease Diagnosis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yu-An Huang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yao Hu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yue-Chao Li</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+X">Xiyue Cao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xinyuan Li</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+K+C">Kay Chen Tan</a>, <a href="/search/cs?searchtype=author&query=You%2C+Z">Zhu-Hong You</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zhi-An Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02630v1-abstract-short" style="display: inline;"> Functional MRI (fMRI) and single-cell transcriptomics are pivotal in Alzheimer's disease (AD) research, each providing unique insights into neural function and molecular mechanisms. However, integrating these complementary modalities remains largely unexplored. Here, we introduce scBIT, a novel method for enhancing AD prediction by combining fMRI with single-nucleus RNA (snRNA). scBIT leverages sn… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02630v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02630v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02630v1-abstract-full" style="display: none;"> Functional MRI (fMRI) and single-cell transcriptomics are pivotal in Alzheimer's disease (AD) research, each providing unique insights into neural function and molecular mechanisms. However, integrating these complementary modalities remains largely unexplored. Here, we introduce scBIT, a novel method for enhancing AD prediction by combining fMRI with single-nucleus RNA (snRNA). scBIT leverages snRNA as an auxiliary modality, significantly improving fMRI-based prediction models and providing comprehensive interpretability. It employs a sampling strategy to segment snRNA data into cell-type-specific gene networks and utilizes a self-explainable graph neural network to extract critical subgraphs. Additionally, we use demographic and genetic similarities to pair snRNA and fMRI data across individuals, enabling robust cross-modal learning. Extensive experiments validate scBIT's effectiveness in revealing intricate brain region-gene associations and enhancing diagnostic prediction accuracy. By advancing brain imaging transcriptomics to the single-cell level, scBIT sheds new light on biomarker discovery in AD research. Experimental results show that incorporating snRNA data into the scBIT model significantly boosts accuracy, improving binary classification by 3.39% and five-class classification by 26.59%. The codes were implemented in Python and have been released on GitHub (https://github.com/77YQ77/scBIT) and Zenodo (https://zenodo.org/records/11599030) with detailed instructions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02630v1-abstract-full').style.display = 'none'; document.getElementById('2502.02630v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">31 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02629">arXiv:2502.02629</a> <span> [<a href="https://arxiv.org/pdf/2502.02629">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Graph Structure Learning for Tumor Microenvironment with Cell Type Annotation from non-spatial scRNA-seq data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yu-An Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yue-Chao Li</a>, <a href="/search/cs?searchtype=author&query=You%2C+H">Hai-Ru You</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+J">Jie Pan</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+X">Xiyue Cao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xinyuan Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zhi-An Huang</a>, <a href="/search/cs?searchtype=author&query=You%2C+Z">Zhu-Hong You</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02629v1-abstract-short" style="display: inline;"> The exploration of cellular heterogeneity within the tumor microenvironment (TME) via single-cell RNA sequencing (scRNA-seq) is essential for understanding cancer progression and response to therapy. Current scRNA-seq approaches, however, lack spatial context and rely on incomplete datasets of ligand-receptor interactions (LRIs), limiting accurate cell type annotation and cell-cell communication (… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02629v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02629v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02629v1-abstract-full" style="display: none;"> The exploration of cellular heterogeneity within the tumor microenvironment (TME) via single-cell RNA sequencing (scRNA-seq) is essential for understanding cancer progression and response to therapy. Current scRNA-seq approaches, however, lack spatial context and rely on incomplete datasets of ligand-receptor interactions (LRIs), limiting accurate cell type annotation and cell-cell communication (CCC) inference. This study addresses these challenges using a novel graph neural network (GNN) model that enhances cell type prediction and cell interaction analysis. Our study utilized a dataset consisting of 49,020 cells from 19 patients across three cancer types: Leukemia, Breast Invasive Carcinoma, and Colorectal Cancer. The proposed scGSL model demonstrated robust performance, achieving an average accuracy of 84.83%, precision of 86.23%, recall of 81.51%, and an F1 score of 80.92% across all datasets. These metrics represent a significant enhancement over existing methods, which typically exhibit lower performance metrics. Additionally, by reviewing existing literature on gene interactions within the TME, the scGSL model proves to robustly identify biologically meaningful gene interactions in an unsupervised manner, validated by significant expression differences in key gene pairs across various cancers. The source code and data used in this paper can be found in https://github.com/LiYuechao1998/scGSL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02629v1-abstract-full').style.display = 'none'; document.getElementById('2502.02629v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">29 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02384">arXiv:2502.02384</a> <span> [<a href="https://arxiv.org/pdf/2502.02384">pdf</a>, <a href="https://arxiv.org/format/2502.02384">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> STAIR: Improving Safety Alignment with Introspective Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yichi Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Siyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yao Huang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Z">Zeyu Xia</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Z">Zhengwei Fang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiao Yang</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+R">Ranjie Duan</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+D">Dong Yan</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Y">Yinpeng Dong</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jun Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02384v1-abstract-short" style="display: inline;"> Ensuring the safety and harmlessness of Large Language Models (LLMs) has become equally critical as their performance in applications. However, existing safety alignment methods typically suffer from safety-performance trade-offs and the susceptibility to jailbreak attacks, primarily due to their reliance on direct refusals for malicious queries. In this paper, we propose STAIR, a novel framework… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02384v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02384v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02384v1-abstract-full" style="display: none;"> Ensuring the safety and harmlessness of Large Language Models (LLMs) has become equally critical as their performance in applications. However, existing safety alignment methods typically suffer from safety-performance trade-offs and the susceptibility to jailbreak attacks, primarily due to their reliance on direct refusals for malicious queries. In this paper, we propose STAIR, a novel framework that integrates SafeTy Alignment with Itrospective Reasoning. We enable LLMs to identify safety risks through step-by-step analysis by self-improving chain-of-thought (CoT) reasoning with safety awareness. STAIR first equips the model with a structured reasoning capability and then advances safety alignment via iterative preference optimization on step-level reasoning data generated using our newly proposed Safety-Informed Monte Carlo Tree Search (SI-MCTS). We further train a process reward model on this data to guide test-time searches for improved responses. Extensive experiments show that STAIR effectively mitigates harmful outputs while better preserving helpfulness, compared to instinctive alignment strategies. With test-time scaling, STAIR achieves a safety performance comparable to Claude-3.5 against popular jailbreak attacks. Relevant resources in this work are available at https://github.com/thu-ml/STAIR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02384v1-abstract-full').style.display = 'none'; document.getElementById('2502.02384v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02195">arXiv:2502.02195</a> <span> [<a href="https://arxiv.org/pdf/2502.02195">pdf</a>, <a href="https://arxiv.org/format/2502.02195">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Geophysics">physics.geo-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> EFKAN: A KAN-Integrated Neural Operator For Efficient Magnetotelluric Forward Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+F">Feng Wang</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+H">Hong Qiu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yingying Huang</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+X">Xiaozhe Gu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Renfang Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+B">Bo Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02195v1-abstract-short" style="display: inline;"> Magnetotelluric (MT) forward modeling is fundamental for improving the accuracy and efficiency of MT inversion. Neural operators (NOs) have been effectively used for rapid MT forward modeling, demonstrating their promising performance in solving the MT forward modeling-related partial differential equations (PDEs). Particularly, they can obtain the electromagnetic field at arbitrary locations and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02195v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02195v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02195v1-abstract-full" style="display: none;"> Magnetotelluric (MT) forward modeling is fundamental for improving the accuracy and efficiency of MT inversion. Neural operators (NOs) have been effectively used for rapid MT forward modeling, demonstrating their promising performance in solving the MT forward modeling-related partial differential equations (PDEs). Particularly, they can obtain the electromagnetic field at arbitrary locations and frequencies. In these NOs, the projection layers have been dominated by multi-layer perceptrons (MLPs), which may potentially reduce the accuracy of solution due to they usually suffer from the disadvantages of MLPs, such as lack of interpretability, overfitting, and so on. Therefore, to improve the accuracy of MT forward modeling with NOs and explore the potential alternatives to MLPs, we propose a novel neural operator by extending the Fourier neural operator (FNO) with Kolmogorov-Arnold network (EFKAN). Within the EFKAN framework, the FNO serves as the branch network to calculate the apparent resistivity and phase from the resistivity model in the frequency domain. Meanwhile, the KAN acts as the trunk network to project the resistivity and phase, determined by the FNO, to the desired locations and frequencies. Experimental results demonstrate that the proposed method not only achieves higher accuracy in obtaining apparent resistivity and phase compared to the NO equipped with MLPs at the desired frequencies and locations but also outperforms traditional numerical methods in terms of computational speed. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02195v1-abstract-full').style.display = 'none'; document.getElementById('2502.02195v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to Computers & Geosciences</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01827">arXiv:2502.01827</a> <span> [<a href="https://arxiv.org/pdf/2502.01827">pdf</a>, <a href="https://arxiv.org/format/2502.01827">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Relatively-Secure LLM-Based Steganography via Constrained Markov Decision Processes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yu-Shin Huang</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+C">Chao Tian</a>, <a href="/search/cs?searchtype=author&query=Narayanan%2C+K">Krishna Narayanan</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+L">Lizhong Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01827v1-abstract-short" style="display: inline;"> Linguistic steganography aims to conceal information within natural language text without being detected. An effective steganography approach should encode the secret message into a minimal number of language tokens while preserving the natural appearance and fluidity of the stego-texts. We present a new framework to enhance the embedding efficiency of stego-texts generated by modifying the output… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01827v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01827v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01827v1-abstract-full" style="display: none;"> Linguistic steganography aims to conceal information within natural language text without being detected. An effective steganography approach should encode the secret message into a minimal number of language tokens while preserving the natural appearance and fluidity of the stego-texts. We present a new framework to enhance the embedding efficiency of stego-texts generated by modifying the output of a large language model (LLM). The novelty of our approach is in abstracting the sequential steganographic embedding process as a Constrained Markov Decision Process (CMDP), which takes into consideration the long-term dependencies instead of merely the immediate effects. We constrain the solution space such that the discounted accumulative total variation divergence between the selected probability distribution and the original distribution given by the LLM is below a threshold. To find the optimal policy, we first show that the functional optimization problem can be simplified to a convex optimization problem with a finite number of variables. A closed-form solution for the optimal policy is then presented to this equivalent problem. It is remarkable that the optimal policy is deterministic and resembles water-filling in some cases. The solution suggests that usually adjusting the probability distribution for the state that has the least random transition probability should be prioritized, but the choice should be made by taking into account the transition probabilities at all states instead of only the current state. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01827v1-abstract-full').style.display = 'none'; document.getElementById('2502.01827v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01689">arXiv:2502.01689</a> <span> [<a href="https://arxiv.org/pdf/2502.01689">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> scGSDR: Harnessing Gene Semantics for Single-Cell Pharmacological Profiling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yu-An Huang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+X">Xiyue Cao</a>, <a href="/search/cs?searchtype=author&query=You%2C+Z">Zhu-Hong You</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yue-Chao Li</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+X">Xuequn Shang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zhi-An Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01689v1-abstract-short" style="display: inline;"> The rise of single-cell sequencing technologies has revolutionized the exploration of drug resistance, revealing the crucial role of cellular heterogeneity in advancing precision medicine. By building computational models from existing single-cell drug response data, we can rapidly annotate cellular responses to drugs in subsequent trials. To this end, we developed scGSDR, a model that integrates… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01689v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01689v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01689v1-abstract-full" style="display: none;"> The rise of single-cell sequencing technologies has revolutionized the exploration of drug resistance, revealing the crucial role of cellular heterogeneity in advancing precision medicine. By building computational models from existing single-cell drug response data, we can rapidly annotate cellular responses to drugs in subsequent trials. To this end, we developed scGSDR, a model that integrates two computational pipelines grounded in the knowledge of cellular states and gene signaling pathways, both essential for understanding biological gene semantics. scGSDR enhances predictive performance by incorporating gene semantics and employs an interpretability module to identify key pathways contributing to drug resistance phenotypes. Our extensive validation, which included 16 experiments covering 11 drugs, demonstrates scGSDR's superior predictive accuracy, when trained with either bulk-seq or scRNA-seq data, achieving high AUROC, AUPR, and F1 Scores. The model's application has extended from single-drug predictions to scenarios involving drug combinations. Leveraging pathways of known drug target genes, we found that scGSDR's cell-pathway attention scores are biologically interpretable, which helped us identify other potential drug-related genes. Literature review of top-ranking genes in our predictions such as BCL2, CCND1, the AKT family, and PIK3CA for PLX4720; and ICAM1, VCAM1, NFKB1, NFKBIA, and RAC1 for Paclitaxel confirmed their relevance. In conclusion, scGSDR, by incorporating gene semantics, enhances predictive modeling of cellular responses to diverse drugs, proving invaluable for scenarios involving both single drug and combination therapies and effectively identifying key resistance-related pathways, thus advancing precision medicine and targeted therapy development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01689v1-abstract-full').style.display = 'none'; document.getElementById('2502.01689v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01637">arXiv:2502.01637</a> <span> [<a href="https://arxiv.org/pdf/2502.01637">pdf</a>, <a href="https://arxiv.org/format/2502.01637">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Scaling Embedding Layers in Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+D">Da Yu</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+E">Edith Cohen</a>, <a href="/search/cs?searchtype=author&query=Ghazi%2C+B">Badih Ghazi</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yangsibo Huang</a>, <a href="/search/cs?searchtype=author&query=Kamath%2C+P">Pritish Kamath</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+R">Ravi Kumar</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Daogao Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chiyuan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01637v1-abstract-short" style="display: inline;"> We propose SCONE ($\textbf{S}$calable, $\textbf{C}$ontextualized, $\textbf{O}$ffloaded, $\textbf{N}$-gram $\textbf{E}$mbedding), a method for extending input embedding layers to enhance language model performance as layer size scales. To avoid increased decoding costs, SCONE retains the original vocabulary while introducing embeddings for a set of frequent $n$-grams. These embeddings provide conte… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01637v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01637v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01637v1-abstract-full" style="display: none;"> We propose SCONE ($\textbf{S}$calable, $\textbf{C}$ontextualized, $\textbf{O}$ffloaded, $\textbf{N}$-gram $\textbf{E}$mbedding), a method for extending input embedding layers to enhance language model performance as layer size scales. To avoid increased decoding costs, SCONE retains the original vocabulary while introducing embeddings for a set of frequent $n$-grams. These embeddings provide contextualized representation for each input token and are learned with a separate model during training. During inference, they are precomputed and stored in off-accelerator memory with minimal impact on inference speed. SCONE enables two new scaling strategies: increasing the number of cached $n$-gram embeddings and scaling the model used to learn them, all while maintaining fixed inference-time FLOPS. We show that scaling both aspects allows SCONE to outperform a 1.9B parameter baseline across diverse corpora, while using only half the inference-time FLOPS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01637v1-abstract-full').style.display = 'none'; document.getElementById('2502.01637v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01609">arXiv:2502.01609</a> <span> [<a href="https://arxiv.org/pdf/2502.01609">pdf</a>, <a href="https://arxiv.org/format/2502.01609">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Breaking Focus: Contextual Distraction Curse in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yue Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yanbo Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zixiang Xu</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+C">Chujie Gao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+S">Siyuan Wu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+J">Jiayi Ye</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiuying Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+P">Pin-Yu Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiangliang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01609v1-abstract-short" style="display: inline;"> Recent advances in Large Language Models (LLMs) have revolutionized generative systems, achieving excellent performance across diverse domains. Although these models perform well in controlled environments, their real-world applications frequently encounter inputs containing both essential and irrelevant details. Our investigation has revealed a critical vulnerability in LLMs, which we term Contex… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01609v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01609v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01609v1-abstract-full" style="display: none;"> Recent advances in Large Language Models (LLMs) have revolutionized generative systems, achieving excellent performance across diverse domains. Although these models perform well in controlled environments, their real-world applications frequently encounter inputs containing both essential and irrelevant details. Our investigation has revealed a critical vulnerability in LLMs, which we term Contextual Distraction Vulnerability (CDV). This phenomenon arises when models fail to maintain consistent performance on questions modified with semantically coherent but irrelevant context. To systematically investigate this vulnerability, we propose an efficient tree-based search methodology to automatically generate CDV examples. Our approach successfully generates CDV examples across four datasets, causing an average performance degradation of approximately 45% in state-of-the-art LLMs. To address this critical issue, we explore various mitigation strategies and find that post-targeted training approaches can effectively enhance model robustness against contextual distractions. Our findings highlight the fundamental nature of CDV as an ability-level challenge rather than a knowledge-level issue since models demonstrate the necessary knowledge by answering correctly in the absence of distractions. This calls the community's attention to address CDV during model development to ensure reliability. The code is available at https://github.com/wyf23187/LLM_CDV. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01609v1-abstract-full').style.display = 'none'; document.getElementById('2502.01609v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01534">arXiv:2502.01534</a> <span> [<a href="https://arxiv.org/pdf/2502.01534">pdf</a>, <a href="https://arxiv.org/format/2502.01534">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Preference Leakage: A Contamination Problem in LLM-as-a-judge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+D">Dawei Li</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+R">Renliang Sun</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yue Huang</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+M">Ming Zhong</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+B">Bohan Jiang</a>, <a href="/search/cs?searchtype=author&query=Han%2C+J">Jiawei Han</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiangliang Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wei Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Huan Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01534v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) as judges and LLM-based data synthesis have emerged as two fundamental LLM-driven data annotation methods in model development. While their combination significantly enhances the efficiency of model training and evaluation, little attention has been given to the potential contamination brought by this new model development paradigm. In this work, we expose preference l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01534v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01534v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01534v1-abstract-full" style="display: none;"> Large Language Models (LLMs) as judges and LLM-based data synthesis have emerged as two fundamental LLM-driven data annotation methods in model development. While their combination significantly enhances the efficiency of model training and evaluation, little attention has been given to the potential contamination brought by this new model development paradigm. In this work, we expose preference leakage, a contamination problem in LLM-as-a-judge caused by the relatedness between the synthetic data generators and LLM-based evaluators. To study this issue, we first define three common relatednesses between data generator LLM and judge LLM: being the same model, having an inheritance relationship, and belonging to the same model family. Through extensive experiments, we empirically confirm the bias of judges towards their related student models caused by preference leakage across multiple LLM baselines and benchmarks. Further analysis suggests that preference leakage is a pervasive issue that is harder to detect compared to previously identified biases in LLM-as-a-judge scenarios. All of these findings imply that preference leakage is a widespread and challenging problem in the area of LLM-as-a-judge. We release all codes and data at: https://github.com/David-Li0406/Preference-Leakage. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01534v1-abstract-full').style.display = 'none'; document.getElementById('2502.01534v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01325">arXiv:2502.01325</a> <span> [<a href="https://arxiv.org/pdf/2502.01325">pdf</a>, <a href="https://arxiv.org/format/2502.01325">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> The Homework Wars: Exploring Emotions, Behaviours, and Conflicts in Parent-Child Homework Interactions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+N">Nan Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yibin Liu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xin Tang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yanyan Liu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+C">Chun Yu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yun Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuntao Wang</a>, <a href="/search/cs?searchtype=author&query=Salim%2C+F+D">Flora D. Salim</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X+O">Xuhai Orson Xu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+J">Jun Wei</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yuanchun Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01325v2-abstract-short" style="display: inline;"> Parental involvement in homework is a crucial aspect of family education, but it often leads to emotional strain and conflicts that can severely impact family well-being. This paper presents findings from a 4-week in situ study involving 78 families in China, where we collected and analyzed 602 valid audio recordings (totalling 475 hours) and daily surveys. Leveraging large language models (LLMs)… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01325v2-abstract-full').style.display = 'inline'; document.getElementById('2502.01325v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01325v2-abstract-full" style="display: none;"> Parental involvement in homework is a crucial aspect of family education, but it often leads to emotional strain and conflicts that can severely impact family well-being. This paper presents findings from a 4-week in situ study involving 78 families in China, where we collected and analyzed 602 valid audio recordings (totalling 475 hours) and daily surveys. Leveraging large language models (LLMs) to analyze parent-child conversations, we gained a nuanced understanding of emotional and behavioural dynamics that overcomes the limitations of traditional one-time surveys and interviews. Our findings reveal significant emotional shifts in parents before and after homework involvement and summarise a range of positive, neutral and negative parental behaviours. We also catalogue seven common conflicts, with Knowledge Conflict being the most frequent. Notably, we found that even well-intentioned parental behaviours, such as Unlabelled Praise, were significantly positively correlated with specific conflict types. This work advances ubiquitous computing's research to sense and understand complex family dynamics, while offering evidence-based insights for designing future ambient intelligent systems to support healthy family education environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01325v2-abstract-full').style.display = 'none'; document.getElementById('2502.01325v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00859">arXiv:2502.00859</a> <span> [<a href="https://arxiv.org/pdf/2502.00859">pdf</a>, <a href="https://arxiv.org/format/2502.00859">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> FedRIR: Rethinking Information Representation in Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yongqiang Huang</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+Z">Zerui Shao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Ziyuan Yang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Z">Zexin Lu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yi Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00859v1-abstract-short" style="display: inline;"> Mobile and Web-of-Things (WoT) devices at the network edge generate vast amounts of data for machine learning applications, yet privacy concerns hinder centralized model training. Federated Learning (FL) allows clients (devices) to collaboratively train a shared model coordinated by a central server without transfer private data, but inherent statistical heterogeneity among clients presents challe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00859v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00859v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00859v1-abstract-full" style="display: none;"> Mobile and Web-of-Things (WoT) devices at the network edge generate vast amounts of data for machine learning applications, yet privacy concerns hinder centralized model training. Federated Learning (FL) allows clients (devices) to collaboratively train a shared model coordinated by a central server without transfer private data, but inherent statistical heterogeneity among clients presents challenges, often leading to a dilemma between clients' needs for personalized local models and the server's goal of building a generalized global model. Existing FL methods typically prioritize either global generalization or local personalization, resulting in a trade-off between these two objectives and limiting the full potential of diverse client data. To address this challenge, we propose a novel framework that simultaneously enhances global generalization and local personalization by Rethinking Information Representation in the Federated learning process (FedRIR). Specifically, we introduce Masked Client-Specific Learning (MCSL), which isolates and extracts fine-grained client-specific features tailored to each client's unique data characteristics, thereby enhancing personalization. Concurrently, the Information Distillation Module (IDM) refines the global shared features by filtering out redundant client-specific information, resulting in a purer and more robust global representation that enhances generalization. By integrating the refined global features with the isolated client-specific features, we construct enriched representations that effectively capture both global patterns and local nuances, thereby improving the performance of downstream tasks on the client. The code is available at https://github.com/Deep-Imaging-Group/FedRIR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00859v1-abstract-full').style.display = 'none'; document.getElementById('2502.00859v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00015">arXiv:2502.00015</a> <span> [<a href="https://arxiv.org/pdf/2502.00015">pdf</a>, <a href="https://arxiv.org/format/2502.00015">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Ethical Concerns of Generative AI and Mitigation Strategies: A Systematic Mapping Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yutan Huang</a>, <a href="/search/cs?searchtype=author&query=Arora%2C+C">Chetan Arora</a>, <a href="/search/cs?searchtype=author&query=Houng%2C+W+C">Wen Cheng Houng</a>, <a href="/search/cs?searchtype=author&query=Kanij%2C+T">Tanjila Kanij</a>, <a href="/search/cs?searchtype=author&query=Madulgalla%2C+A">Anuradha Madulgalla</a>, <a href="/search/cs?searchtype=author&query=Grundy%2C+J">John Grundy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00015v1-abstract-short" style="display: inline;"> [Context] Generative AI technologies, particularly Large Language Models (LLMs), have transformed numerous domains by enhancing convenience and efficiency in information retrieval, content generation, and decision-making processes. However, deploying LLMs also presents diverse ethical challenges, and their mitigation strategies remain complex and domain-dependent. [Objective] This paper aims to id… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00015v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00015v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00015v1-abstract-full" style="display: none;"> [Context] Generative AI technologies, particularly Large Language Models (LLMs), have transformed numerous domains by enhancing convenience and efficiency in information retrieval, content generation, and decision-making processes. However, deploying LLMs also presents diverse ethical challenges, and their mitigation strategies remain complex and domain-dependent. [Objective] This paper aims to identify and categorize the key ethical concerns associated with using LLMs, examine existing mitigation strategies, and assess the outstanding challenges in implementing these strategies across various domains. [Method] We conducted a systematic mapping study, reviewing 39 studies that discuss ethical concerns and mitigation strategies related to LLMs. We analyzed these ethical concerns using five ethical dimensions that we extracted based on various existing guidelines, frameworks, and an analysis of the mitigation strategies and implementation challenges. [Results] Our findings reveal that ethical concerns in LLMs are multi-dimensional and context-dependent. While proposed mitigation strategies address some of these concerns, significant challenges still remain. [Conclusion] Our results highlight that ethical issues often hinder the practical implementation of the mitigation strategies, particularly in high-stake areas like healthcare and public governance; existing frameworks often lack adaptability, failing to accommodate evolving societal expectations and diverse contexts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00015v1-abstract-full').style.display = 'none'; document.getElementById('2502.00015v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.19401">arXiv:2501.19401</a> <span> [<a href="https://arxiv.org/pdf/2501.19401">pdf</a>, <a href="https://arxiv.org/format/2501.19401">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Detection Is All You Need: A Feasible Optimal Prior-Free Black-Box Approach For Piecewise Stationary Bandits </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gerogiannis%2C+A">Argyrios Gerogiannis</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yu-Han Huang</a>, <a href="/search/cs?searchtype=author&query=Bose%2C+S">Subhonmesh Bose</a>, <a href="/search/cs?searchtype=author&query=Veeravalli%2C+V+V">Venugopal V. Veeravalli</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.19401v1-abstract-short" style="display: inline;"> We study the problem of piecewise stationary bandits without prior knowledge of the underlying non-stationarity. We propose the first $\textit{feasible}$ black-box algorithm applicable to most common parametric bandit variants. Our procedure, termed Detection Augmented Bandit (DAB), is modular, accepting any stationary bandit algorithm as input and augmenting it with a change detector. DAB achieve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19401v1-abstract-full').style.display = 'inline'; document.getElementById('2501.19401v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.19401v1-abstract-full" style="display: none;"> We study the problem of piecewise stationary bandits without prior knowledge of the underlying non-stationarity. We propose the first $\textit{feasible}$ black-box algorithm applicable to most common parametric bandit variants. Our procedure, termed Detection Augmented Bandit (DAB), is modular, accepting any stationary bandit algorithm as input and augmenting it with a change detector. DAB achieves optimal regret in the piecewise stationary setting under mild assumptions. Specifically, we prove that DAB attains the order-optimal regret bound of $\tilde{\mathcal{O}}(\sqrt{N_T T})$, where $N_T$ denotes the number of changes over the horizon $T$, if its input stationary bandit algorithm has order-optimal stationary regret guarantees. Applying DAB to different parametric bandit settings, we recover recent state-of-the-art results. Notably, for self-concordant bandits, DAB achieves optimal dynamic regret, while previous works obtain suboptimal bounds and require knowledge on the non-stationarity. In simulations on piecewise stationary environments, DAB outperforms existing approaches across varying number of changes. Interestingly, despite being theoretically designed for piecewise stationary environments, DAB is also effective in simulations in drifting environments, outperforming existing methods designed specifically for this scenario. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19401v1-abstract-full').style.display = 'none'; document.getElementById('2501.19401v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.19319">arXiv:2501.19319</a> <span> [<a href="https://arxiv.org/pdf/2501.19319">pdf</a>, <a href="https://arxiv.org/format/2501.19319">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Advancing Dense Endoscopic Reconstruction with Gaussian Splatting-driven Surface Normal-aware Tracking and Mapping </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yiming Huang</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+B">Beilei Cui</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+L">Long Bai</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhen Chen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jinlin Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhen Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongbin Liu</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+H">Hongliang Ren</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.19319v1-abstract-short" style="display: inline;"> Simultaneous Localization and Mapping (SLAM) is essential for precise surgical interventions and robotic tasks in minimally invasive procedures. While recent advancements in 3D Gaussian Splatting (3DGS) have improved SLAM with high-quality novel view synthesis and fast rendering, these systems struggle with accurate depth and surface reconstruction due to multi-view inconsistencies. Simply incorpo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19319v1-abstract-full').style.display = 'inline'; document.getElementById('2501.19319v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.19319v1-abstract-full" style="display: none;"> Simultaneous Localization and Mapping (SLAM) is essential for precise surgical interventions and robotic tasks in minimally invasive procedures. While recent advancements in 3D Gaussian Splatting (3DGS) have improved SLAM with high-quality novel view synthesis and fast rendering, these systems struggle with accurate depth and surface reconstruction due to multi-view inconsistencies. Simply incorporating SLAM and 3DGS leads to mismatches between the reconstructed frames. In this work, we present Endo-2DTAM, a real-time endoscopic SLAM system with 2D Gaussian Splatting (2DGS) to address these challenges. Endo-2DTAM incorporates a surface normal-aware pipeline, which consists of tracking, mapping, and bundle adjustment modules for geometrically accurate reconstruction. Our robust tracking module combines point-to-point and point-to-plane distance metrics, while the mapping module utilizes normal consistency and depth distortion to enhance surface reconstruction quality. We also introduce a pose-consistent strategy for efficient and geometrically coherent keyframe sampling. Extensive experiments on public endoscopic datasets demonstrate that Endo-2DTAM achieves an RMSE of $1.87\pm 0.63$ mm for depth reconstruction of surgical scenes while maintaining computationally efficient tracking, high-quality visual appearance, and real-time rendering. Our code will be released at github.com/lastbasket/Endo-2DTAM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19319v1-abstract-full').style.display = 'none'; document.getElementById('2501.19319v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICRA 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.19159">arXiv:2501.19159</a> <span> [<a href="https://arxiv.org/pdf/2501.19159">pdf</a>, <a href="https://arxiv.org/format/2501.19159">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> GDO: Gradual Domain Osmosis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zixi Wang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yubo Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.19159v1-abstract-short" style="display: inline;"> In this paper, we propose a new method called Gradual Domain Osmosis, which aims to solve the problem of smooth knowledge migration from source domain to target domain in Gradual Domain Adaptation (GDA). Traditional Gradual Domain Adaptation methods mitigate domain bias by introducing intermediate domains and self-training strategies, but often face the challenges of inefficient knowledge migratio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19159v1-abstract-full').style.display = 'inline'; document.getElementById('2501.19159v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.19159v1-abstract-full" style="display: none;"> In this paper, we propose a new method called Gradual Domain Osmosis, which aims to solve the problem of smooth knowledge migration from source domain to target domain in Gradual Domain Adaptation (GDA). Traditional Gradual Domain Adaptation methods mitigate domain bias by introducing intermediate domains and self-training strategies, but often face the challenges of inefficient knowledge migration or missing data in intermediate domains. In this paper, we design an optimisation framework based on the hyperparameter $位$ by dynamically balancing the loss weights of the source and target domains, which enables the model to progressively adjust the strength of knowledge migration ($位$ incrementing from 0 to 1) during the training process, thus achieving cross-domain generalisation more efficiently. Specifically, the method incorporates self-training to generate pseudo-labels and iteratively updates the model by minimising a weighted loss function to ensure stability and robustness during progressive adaptation in the intermediate domain. The experimental part validates the effectiveness of the method on rotated MNIST, colour-shifted MNIST, portrait dataset and forest cover type dataset, and the results show that it outperforms existing baseline methods. The paper further analyses the impact of the dynamic tuning strategy of the hyperparameter $位$ on the performance through ablation experiments, confirming the advantages of progressive domain penetration in mitigating the domain bias and enhancing the model generalisation capability. The study provides a theoretical support and practical framework for asymptotic domain adaptation and expands its application potential in dynamic environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19159v1-abstract-full').style.display = 'none'; document.getElementById('2501.19159v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to icml 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.19155">arXiv:2501.19155</a> <span> [<a href="https://arxiv.org/pdf/2501.19155">pdf</a>, <a href="https://arxiv.org/format/2501.19155">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SWAT: Sliding Window Adversarial Training for Gradual Domain Adaptation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zixi Wang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yubo Huang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+W">Wenwei Luo</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tonglan Xie</a>, <a href="/search/cs?searchtype=author&query=Jing%2C+M">Mengmeng Jing</a>, <a href="/search/cs?searchtype=author&query=Zuo%2C+L">Lin Zuo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.19155v1-abstract-short" style="display: inline;"> Domain shifts are critical issues that harm the performance of machine learning. Unsupervised Domain Adaptation (UDA) mitigates this issue but suffers when the domain shifts are steep and drastic. Gradual Domain Adaptation (GDA) alleviates this problem in a mild way by gradually adapting from the source to the target domain using multiple intermediate domains. In this paper, we propose Sliding Win… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19155v1-abstract-full').style.display = 'inline'; document.getElementById('2501.19155v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.19155v1-abstract-full" style="display: none;"> Domain shifts are critical issues that harm the performance of machine learning. Unsupervised Domain Adaptation (UDA) mitigates this issue but suffers when the domain shifts are steep and drastic. Gradual Domain Adaptation (GDA) alleviates this problem in a mild way by gradually adapting from the source to the target domain using multiple intermediate domains. In this paper, we propose Sliding Window Adversarial Training (SWAT) for Gradual Domain Adaptation. SWAT uses the construction of adversarial streams to connect the feature spaces of the source and target domains. In order to gradually narrow the small gap between adjacent intermediate domains, a sliding window paradigm is designed that moves along the adversarial stream. When the window moves to the end of the stream, i.e., the target domain, the domain shift is drastically reduced. Extensive experiments are conducted on public GDA benchmarks, and the results demonstrate that the proposed SWAT significantly outperforms the state-of-the-art approaches. The implementation is available at: https://anonymous.4open.science/r/SWAT-8677. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19155v1-abstract-full').style.display = 'none'; document.getElementById('2501.19155v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">submitted to icml 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18914">arXiv:2501.18914</a> <span> [<a href="https://arxiv.org/pdf/2501.18914">pdf</a>, <a href="https://arxiv.org/format/2501.18914">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Scaling Laws for Differentially Private Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=McKenna%2C+R">Ryan McKenna</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yangsibo Huang</a>, <a href="/search/cs?searchtype=author&query=Sinha%2C+A">Amer Sinha</a>, <a href="/search/cs?searchtype=author&query=Balle%2C+B">Borja Balle</a>, <a href="/search/cs?searchtype=author&query=Charles%2C+Z">Zachary Charles</a>, <a href="/search/cs?searchtype=author&query=Choquette-Choo%2C+C+A">Christopher A. Choquette-Choo</a>, <a href="/search/cs?searchtype=author&query=Ghazi%2C+B">Badih Ghazi</a>, <a href="/search/cs?searchtype=author&query=Kaissis%2C+G">George Kaissis</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+R">Ravi Kumar</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+R">Ruibo Liu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Da Yu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chiyuan Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18914v1-abstract-short" style="display: inline;"> Scaling laws have emerged as important components of large language model (LLM) training as they can predict performance gains through scale, and provide guidance on important hyper-parameter choices that would otherwise be expensive. LLMs also rely on large, high-quality training datasets, like those sourced from (sometimes sensitive) user data. Training models on this sensitive user data require… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18914v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18914v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18914v1-abstract-full" style="display: none;"> Scaling laws have emerged as important components of large language model (LLM) training as they can predict performance gains through scale, and provide guidance on important hyper-parameter choices that would otherwise be expensive. LLMs also rely on large, high-quality training datasets, like those sourced from (sometimes sensitive) user data. Training models on this sensitive user data requires careful privacy protections like differential privacy (DP). However, the dynamics of DP training are significantly different, and consequently their scaling laws are not yet fully understood. In this work, we establish scaling laws that accurately model the intricacies of DP LLM training, providing a complete picture of the compute-privacy-utility tradeoffs and the optimal training configurations in many settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18914v1-abstract-full').style.display = 'none'; document.getElementById('2501.18914v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18716">arXiv:2501.18716</a> <span> [<a href="https://arxiv.org/pdf/2501.18716">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> Full-Head Segmentation of MRI with Abnormal Brain Anatomy: Model and Data Release </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Birnbaum%2C+A+M">Andrew M Birnbaum</a>, <a href="/search/cs?searchtype=author&query=Buchwald%2C+A">Adam Buchwald</a>, <a href="/search/cs?searchtype=author&query=Turkeltaub%2C+P">Peter Turkeltaub</a>, <a href="/search/cs?searchtype=author&query=Jacks%2C+A">Adam Jacks</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yu Huang</a>, <a href="/search/cs?searchtype=author&query=Datta%2C+A">Abhisheck Datta</a>, <a href="/search/cs?searchtype=author&query=Parra%2C+L+C">Lucas C Parra</a>, <a href="/search/cs?searchtype=author&query=Hirsch%2C+L+A">Lukas A Hirsch</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18716v1-abstract-short" style="display: inline;"> The goal of this work was to develop a deep network for whole-head segmentation, including clinical MRIs with abnormal anatomy, and compile the first public benchmark dataset for this purpose. We collected 91 MRIs with volumetric segmentation labels for a diverse set of human subjects (4 normal, 32 traumatic brain injuries, and 57 strokes). These clinical cases are characterized by extended cerebr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18716v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18716v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18716v1-abstract-full" style="display: none;"> The goal of this work was to develop a deep network for whole-head segmentation, including clinical MRIs with abnormal anatomy, and compile the first public benchmark dataset for this purpose. We collected 91 MRIs with volumetric segmentation labels for a diverse set of human subjects (4 normal, 32 traumatic brain injuries, and 57 strokes). These clinical cases are characterized by extended cerebrospinal fluid (CSF) in regions normally containing the brain. Training labels were generated by manually correcting initial automated segmentations for skin/scalp, skull, CSF, gray matter, white matter, air cavity, and extracephalic air. We developed a MultiAxial network consisting of three 2D U-Net models that operate independently in sagittal, axial, and coronal planes and are then combined to produce a single 3D segmentation. The MultiAxial network achieved test-set Dice scores of 0.88 (median plus-minus 0.04). For brain tissue, it significantly outperforms existing brain segmentation methods (MultiAxial: 0.898 plus-minus 0.041, SynthSeg: 0.758 plus-minus 0.054, BrainChop: 0.757 plus-minus 0.125). The MultiAxial network gains in robustness by avoiding the need for coregistration with an atlas. It performed well in regions with abnormal anatomy and on images that have been de-identified. It enables more robust current flow modeling when incorporated into ROAST, a widely-used modeling toolbox for transcranial electric stimulation. We are releasing a state-of-the-art model for whole-head MRI segmentation, along with a dataset of 61 clinical MRIs and training labels, including non-brain structures. Together, the model and data may serve as a benchmark for future efforts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18716v1-abstract-full').style.display = 'none'; document.getElementById('2501.18716v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18642">arXiv:2501.18642</a> <span> [<a href="https://arxiv.org/pdf/2501.18642">pdf</a>, <a href="https://arxiv.org/format/2501.18642">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DebiasPI: Inference-time Debiasing by Prompt Iteration of a Text-to-Image Generative Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bonna%2C+S">Sarah Bonna</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yu-Cheng Huang</a>, <a href="/search/cs?searchtype=author&query=Novozhilova%2C+E">Ekaterina Novozhilova</a>, <a href="/search/cs?searchtype=author&query=Paik%2C+S">Sejin Paik</a>, <a href="/search/cs?searchtype=author&query=Shan%2C+Z">Zhengyang Shan</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+M+Y">Michelle Yilin Feng</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+G">Ge Gao</a>, <a href="/search/cs?searchtype=author&query=Tayal%2C+Y">Yonish Tayal</a>, <a href="/search/cs?searchtype=author&query=Kulkarni%2C+R">Rushil Kulkarni</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J">Jialin Yu</a>, <a href="/search/cs?searchtype=author&query=Divekar%2C+N">Nupur Divekar</a>, <a href="/search/cs?searchtype=author&query=Ghadiyaram%2C+D">Deepti Ghadiyaram</a>, <a href="/search/cs?searchtype=author&query=Wijaya%2C+D">Derry Wijaya</a>, <a href="/search/cs?searchtype=author&query=Betke%2C+M">Margrit Betke</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18642v1-abstract-short" style="display: inline;"> Ethical intervention prompting has emerged as a tool to counter demographic biases of text-to-image generative AI models. Existing solutions either require to retrain the model or struggle to generate images that reflect desired distributions on gender and race. We propose an inference-time process called DebiasPI for Debiasing-by-Prompt-Iteration that provides prompt intervention by enabling the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18642v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18642v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18642v1-abstract-full" style="display: none;"> Ethical intervention prompting has emerged as a tool to counter demographic biases of text-to-image generative AI models. Existing solutions either require to retrain the model or struggle to generate images that reflect desired distributions on gender and race. We propose an inference-time process called DebiasPI for Debiasing-by-Prompt-Iteration that provides prompt intervention by enabling the user to control the distributions of individuals' demographic attributes in image generation. DebiasPI keeps track of which attributes have been generated either by probing the internal state of the model or by using external attribute classifiers. Its control loop guides the text-to-image model to select not yet sufficiently represented attributes, With DebiasPI, we were able to create images with equal representations of race and gender that visualize challenging concepts of news headlines. We also experimented with the attributes age, body type, profession, and skin tone, and measured how attributes change when our intervention prompt targets the distribution of an unrelated attribute type. We found, for example, if the text-to-image model is asked to balance racial representation, gender representation improves but the skin tone becomes less diverse. Attempts to cover a wide range of skin colors with various intervention prompts showed that the model struggles to generate the palest skin tones. We conducted various ablation studies, in which we removed DebiasPI's attribute control, that reveal the model's propensity to generate young, male characters. It sometimes visualized career success by generating two-panel images with a pre-success dark-skinned person becoming light-skinned with success, or switching gender from pre-success female to post-success male, thus further motivating ethical intervention prompting with DebiasPI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18642v1-abstract-full').style.display = 'none'; document.getElementById('2501.18642v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work was presented at The European Conference on Computer Vision (ECCV) 2024 Workshop "Fairness and ethics towards transparent AI: facing the chalLEnge through model Debiasing" (FAILED), Milano, Italy, on September 29, 2024, https://failed-workshop-eccv-2024.github.io</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18614">arXiv:2501.18614</a> <span> [<a href="https://arxiv.org/pdf/2501.18614">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Review and Recommendations for using Artificial Intelligence in Intracoronary Optical Coherence Tomography Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xu Chen</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yuan Huang</a>, <a href="/search/cs?searchtype=author&query=Jessney%2C+B">Benn Jessney</a>, <a href="/search/cs?searchtype=author&query=Sangha%2C+J">Jason Sangha</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+S">Sophie Gu</a>, <a href="/search/cs?searchtype=author&query=Sch%C3%B6nlieb%2C+C">Carola-Bibiane Sch枚nlieb</a>, <a href="/search/cs?searchtype=author&query=Bennett%2C+M">Martin Bennett</a>, <a href="/search/cs?searchtype=author&query=Roberts%2C+M">Michael Roberts</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18614v1-abstract-short" style="display: inline;"> Artificial intelligence (AI) methodologies hold great promise for the rapid and accurate diagnosis of coronary artery disease (CAD) from intravascular optical coherent tomography (IVOCT) images. Numerous papers have been published describing AI-based models for different diagnostic tasks, yet it remains unclear which models have potential clinical utility and have been properly validated. This sys… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18614v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18614v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18614v1-abstract-full" style="display: none;"> Artificial intelligence (AI) methodologies hold great promise for the rapid and accurate diagnosis of coronary artery disease (CAD) from intravascular optical coherent tomography (IVOCT) images. Numerous papers have been published describing AI-based models for different diagnostic tasks, yet it remains unclear which models have potential clinical utility and have been properly validated. This systematic review considered published literature between January 2015 and February 2023 describing AI-based diagnosis of CAD using IVOCT. Our search identified 5,576 studies, with 513 included after initial screening and 35 studies included in the final systematic review after quality screening. Our findings indicate that most of the identified models are not currently suitable for clinical use, primarily due to methodological flaws and underlying biases. To address these issues, we provide recommendations to improve model quality and research practices to enhance the development of clinically useful AI products. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18614v1-abstract-full').style.display = 'none'; document.getElementById('2501.18614v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17412">arXiv:2501.17412</a> <span> [<a href="https://arxiv.org/pdf/2501.17412">pdf</a>, <a href="https://arxiv.org/ps/2501.17412">ps</a>, <a href="https://arxiv.org/format/2501.17412">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Randomized Scheduling for Periodic Multi-Source Systems with PAoI Violation Guarantees </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+K">Kuan-Yu Lin</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+W">Wei-Lun Lu</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+Y">Yu-Pin Hsu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yu-Chih Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17412v1-abstract-short" style="display: inline;"> The Age of Information (AoI) has been recognized as a critical metric for assessing the freshness of information in modern communication systems. In this work, we examine an information update system where multiple information sources transmit updates to their respective destinations via a shared base station. Our main contribution is the proposal of a randomized scheduling algorithm that offers d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17412v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17412v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17412v1-abstract-full" style="display: none;"> The Age of Information (AoI) has been recognized as a critical metric for assessing the freshness of information in modern communication systems. In this work, we examine an information update system where multiple information sources transmit updates to their respective destinations via a shared base station. Our main contribution is the proposal of a randomized scheduling algorithm that offers distinct statistical AoI guarantees for heterogeneous sources. Specifically, we rigorously derive an analytical upper bound on peak age of information (PAoI) violation probability by leveraging properties of the multivariate noncentral hypergeometric Wallenius distribution. Building on these analytical results, two designs of coefficients for the randomized policy are proposed to meet the outage constraints for all sources, tailored to the long and short sampling delay cases, respectively. Simulation results demonstrate the accuracy of our analysis on PAoI violation probability and also show that our proposed design always provides a feasible solution in most cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17412v1-abstract-full').style.display = 'none'; document.getElementById('2501.17412v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Huang%2C+Y&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Huang%2C+Y&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+Y&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+Y&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+Y&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Huang%2C+Y&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository