Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 304 results for author: <span class="mathjax">Yao, X</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Yao%2C+X">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Yao, X"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Yao%2C+X&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Yao, X"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Yao%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Yao%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Yao%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Yao%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Yao%2C+X&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Yao%2C+X&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Yao%2C+X&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> <li> <a href="/search/?searchtype=author&query=Yao%2C+X&start=300" class="pagination-link " aria-label="Page 7" aria-current="page">7 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.19285">arXiv:2503.19285</a> <span> [<a href="https://arxiv.org/pdf/2503.19285">pdf</a>, <a href="https://arxiv.org/format/2503.19285">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> No Black Box Anymore: Demystifying Clinical Predictive Modeling with Temporal-Feature Cross Attention Mechanism </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yubo Li</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xinyu Yao</a>, <a href="/search/cs?searchtype=author&query=Padman%2C+R">Rema Padman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.19285v2-abstract-short" style="display: inline;"> Despite the outstanding performance of deep learning models in clinical prediction tasks, explainability remains a significant challenge. Inspired by transformer architectures, we introduce the Temporal-Feature Cross Attention Mechanism (TFCAM), a novel deep learning framework designed to capture dynamic interactions among clinical features across time, enhancing both predictive accuracy and inter… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.19285v2-abstract-full').style.display = 'inline'; document.getElementById('2503.19285v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.19285v2-abstract-full" style="display: none;"> Despite the outstanding performance of deep learning models in clinical prediction tasks, explainability remains a significant challenge. Inspired by transformer architectures, we introduce the Temporal-Feature Cross Attention Mechanism (TFCAM), a novel deep learning framework designed to capture dynamic interactions among clinical features across time, enhancing both predictive accuracy and interpretability. In an experiment with 1,422 patients with Chronic Kidney Disease, predicting progression to End-Stage Renal Disease, TFCAM outperformed LSTM and RETAIN baselines, achieving an AUROC of 0.95 and an F1-score of 0.69. Beyond performance gains, TFCAM provides multi-level explainability by identifying critical temporal periods, ranking feature importance, and quantifying how features influence each other across time before affecting predictions. Our approach addresses the "black box" limitations of deep learning in healthcare, offering clinicians transparent insights into disease progression mechanisms while maintaining state-of-the-art predictive performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.19285v2-abstract-full').style.display = 'none'; document.getElementById('2503.19285v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 3 figures, submitted to AMIA 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.18730">arXiv:2503.18730</a> <span> [<a href="https://arxiv.org/pdf/2503.18730">pdf</a>, <a href="https://arxiv.org/format/2503.18730">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Predicting the Road Ahead: A Knowledge Graph based Foundation Model for Scene Understanding in Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Hongkuan Zhou</a>, <a href="/search/cs?searchtype=author&query=Schmid%2C+S">Stefan Schmid</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yicong Li</a>, <a href="/search/cs?searchtype=author&query=Halilaj%2C+L">Lavdim Halilaj</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xiangtong Yao</a>, <a href="/search/cs?searchtype=author&query=cao%2C+W">Wei cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.18730v1-abstract-short" style="display: inline;"> The autonomous driving field has seen remarkable advancements in various topics, such as object recognition, trajectory prediction, and motion planning. However, current approaches face limitations in effectively comprehending the complex evolutions of driving scenes over time. This paper proposes FM4SU, a novel methodology for training a symbolic foundation model (FM) for scene understanding in a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.18730v1-abstract-full').style.display = 'inline'; document.getElementById('2503.18730v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.18730v1-abstract-full" style="display: none;"> The autonomous driving field has seen remarkable advancements in various topics, such as object recognition, trajectory prediction, and motion planning. However, current approaches face limitations in effectively comprehending the complex evolutions of driving scenes over time. This paper proposes FM4SU, a novel methodology for training a symbolic foundation model (FM) for scene understanding in autonomous driving. It leverages knowledge graphs (KGs) to capture sensory observation along with domain knowledge such as road topology, traffic rules, or complex interactions between traffic participants. A bird's eye view (BEV) symbolic representation is extracted from the KG for each driving scene, including the spatio-temporal information among the objects across the scenes. The BEV representation is serialized into a sequence of tokens and given to pre-trained language models (PLMs) for learning an inherent understanding of the co-occurrence among driving scene elements and generating predictions on the next scenes. We conducted a number of experiments using the nuScenes dataset and KG in various scenarios. The results demonstrate that fine-tuned models achieve significantly higher accuracy in all tasks. The fine-tuned T5 model achieved a next scene prediction accuracy of 86.7%. This paper concludes that FM4SU offers a promising foundation for developing more comprehensive models for scene understanding in autonomous driving. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.18730v1-abstract-full').style.display = 'none'; document.getElementById('2503.18730v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.06227">arXiv:2503.06227</a> <span> [<a href="https://arxiv.org/pdf/2503.06227">pdf</a>, <a href="https://arxiv.org/format/2503.06227">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> GAT-Grasp: Gesture-Driven Affordance Transfer for Task-Aware Robotic Grasping </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+R">Ruixiang Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Huayi Zhou</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xinyue Yao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guiliang Liu</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+K">Kui Jia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.06227v1-abstract-short" style="display: inline;"> Achieving precise and generalizable grasping across diverse objects and environments is essential for intelligent and collaborative robotic systems. However, existing approaches often struggle with ambiguous affordance reasoning and limited adaptability to unseen objects, leading to suboptimal grasp execution. In this work, we propose GAT-Grasp, a gesture-driven grasping framework that directly ut… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.06227v1-abstract-full').style.display = 'inline'; document.getElementById('2503.06227v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.06227v1-abstract-full" style="display: none;"> Achieving precise and generalizable grasping across diverse objects and environments is essential for intelligent and collaborative robotic systems. However, existing approaches often struggle with ambiguous affordance reasoning and limited adaptability to unseen objects, leading to suboptimal grasp execution. In this work, we propose GAT-Grasp, a gesture-driven grasping framework that directly utilizes human hand gestures to guide the generation of task-specific grasp poses with appropriate positioning and orientation. Specifically, we introduce a retrieval-based affordance transfer paradigm, leveraging the implicit correlation between hand gestures and object affordances to extract grasping knowledge from large-scale human-object interaction videos. By eliminating the reliance on pre-given object priors, GAT-Grasp enables zero-shot generalization to novel objects and cluttered environments. Real-world evaluations confirm its robustness across diverse and unseen scenarios, demonstrating reliable grasp execution in complex task settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.06227v1-abstract-full').style.display = 'none'; document.getElementById('2503.06227v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.03211">arXiv:2503.03211</a> <span> [<a href="https://arxiv.org/pdf/2503.03211">pdf</a>, <a href="https://arxiv.org/format/2503.03211">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> NodeReg: Mitigating the Imbalance and Distribution Shift Effects in Semi-Supervised Node Classification via Norm Consistency </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+S">Shenzhi Yang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+J">Jun Xia</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jingbo Zhou</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xingkai Yao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaofang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.03211v1-abstract-short" style="display: inline;"> Aggregating information from neighboring nodes benefits graph neural networks (GNNs) in semi-supervised node classification tasks. Nevertheless, this mechanism also renders nodes susceptible to the influence of their neighbors. For instance, this will occur when the neighboring nodes are imbalanced or the neighboring nodes contain noise, which can even affect the GNN's ability to generalize out of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.03211v1-abstract-full').style.display = 'inline'; document.getElementById('2503.03211v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.03211v1-abstract-full" style="display: none;"> Aggregating information from neighboring nodes benefits graph neural networks (GNNs) in semi-supervised node classification tasks. Nevertheless, this mechanism also renders nodes susceptible to the influence of their neighbors. For instance, this will occur when the neighboring nodes are imbalanced or the neighboring nodes contain noise, which can even affect the GNN's ability to generalize out of distribution. We find that ensuring the consistency of the norm for node representations can significantly reduce the impact of these two issues on GNNs. To this end, we propose a regularized optimization method called NodeReg that enforces the consistency of node representation norms. This method is simple but effective and satisfies Lipschitz continuity, thus facilitating stable optimization and significantly improving semi-supervised node classification performance under the above two scenarios. To illustrate, in the imbalance scenario, when training a GCN with an imbalance ratio of 0.1, NodeReg outperforms the most competitive baselines by 1.4%-25.9% in F1 score across five public datasets. Similarly, in the distribution shift scenario, NodeReg outperforms the most competitive baseline by 1.4%-3.1% in accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.03211v1-abstract-full').style.display = 'none'; document.getElementById('2503.03211v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.00813">arXiv:2503.00813</a> <span> [<a href="https://arxiv.org/pdf/2503.00813">pdf</a>, <a href="https://arxiv.org/format/2503.00813">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> HLoRA: Efficient Federated Learning System for LLM Heterogeneous Fine-Tuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qianli Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhaorui Zhang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xin Yao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+B">Benben Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.00813v1-abstract-short" style="display: inline;"> Federated learning systems have been identified as an efficient approach to scaling distributed model training with a large amount of participants or data owners while guaranteeing data privacy. To apply the current most popular pre-trained large language models to other domains with data privacy guarantee requirements, existing works propose fine-tuning the pre-trained large language models in fe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.00813v1-abstract-full').style.display = 'inline'; document.getElementById('2503.00813v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.00813v1-abstract-full" style="display: none;"> Federated learning systems have been identified as an efficient approach to scaling distributed model training with a large amount of participants or data owners while guaranteeing data privacy. To apply the current most popular pre-trained large language models to other domains with data privacy guarantee requirements, existing works propose fine-tuning the pre-trained large language models in federated learning environments across data owners using the parameter efficient fine-tuning approaches, LoRA. To address the resource and data heterogeneous issues for the participants, previous works adopted heterogeneous LoRA using different ranks for different clients and pending their rank, which brings bias for the parameter aggregation. To address this issue, we propose HLoRA, an efficient federated learning system utilizing a modified LoRA approach that incorporates rank heterogeneity to optimize communication and computational efficiency. Experimental results, conducted using the Microsoft Research Paraphrase Corpus (MRPC), Quora Question Pairs (QQP) and Recognizing Textual Entailment (RTE), within the Plato federated learning framework, demonstrate that our method not only reduces resource demands but also outperforms traditional LoRA applications in terms of convergence speed and final model accuracy. This study shows that our approach can significantly improve the practical deployment of federated LLM fine-tuning, particularly in environments with diverse client resources. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.00813v1-abstract-full').style.display = 'none'; document.getElementById('2503.00813v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.19790">arXiv:2502.19790</a> <span> [<a href="https://arxiv.org/pdf/2502.19790">pdf</a>, <a href="https://arxiv.org/format/2502.19790">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> Mixtera: A Data Plane for Foundation Model Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=B%C3%B6ther%2C+M">Maximilian B枚ther</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xiaozhe Yao</a>, <a href="/search/cs?searchtype=author&query=Kerimoglu%2C+T">Tolga Kerimoglu</a>, <a href="/search/cs?searchtype=author&query=Klimovic%2C+A">Ana Klimovic</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.19790v1-abstract-short" style="display: inline;"> State-of-the-art large language and vision models are trained over trillions of tokens that are aggregated from a large variety of sources. As training data collections grow, manually managing the samples becomes time-consuming, tedious, and prone to errors. Yet recent research shows that the data mixture and the order in which samples are visited during training can significantly influence model… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.19790v1-abstract-full').style.display = 'inline'; document.getElementById('2502.19790v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.19790v1-abstract-full" style="display: none;"> State-of-the-art large language and vision models are trained over trillions of tokens that are aggregated from a large variety of sources. As training data collections grow, manually managing the samples becomes time-consuming, tedious, and prone to errors. Yet recent research shows that the data mixture and the order in which samples are visited during training can significantly influence model accuracy. We build and present Mixtera, a data plane for foundation model training that enables users to declaratively express which data samples should be used in which proportion and in which order during training. Mixtera is a centralized, read-only layer that is deployed on top of existing training data collections and can be declaratively queried. It operates independently of the filesystem structure and supports mixtures across arbitrary properties (e.g., language, source dataset) as well as dynamic adjustment of the mixture based on model feedback. We experimentally evaluate Mixtera and show that our implementation does not bottleneck training and scales to 256 GH200 superchips. We demonstrate how Mixtera supports recent advancements in mixing strategies by implementing the proposed Adaptive Data Optimization (ADO) algorithm in the system and evaluating its performance impact. We also explore the role of mixtures for vision-language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.19790v1-abstract-full').style.display = 'none'; document.getElementById('2502.19790v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">under submission</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16982">arXiv:2502.16982</a> <span> [<a href="https://arxiv.org/pdf/2502.16982">pdf</a>, <a href="https://arxiv.org/format/2502.16982">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Muon is Scalable for LLM Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jingyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Su%2C+J">Jianlin Su</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xingcheng Yao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zhejun Jiang</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+G">Guokun Lai</a>, <a href="/search/cs?searchtype=author&query=Du%2C+Y">Yulun Du</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+Y">Yidao Qin</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+W">Weixin Xu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+E">Enzhe Lu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+J">Junjie Yan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yanru Chen</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+H">Huabin Zheng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yibo Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shaowei Liu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+B">Bohong Yin</a>, <a href="/search/cs?searchtype=author&query=He%2C+W">Weiran He</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+H">Han Zhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuzhi Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jianzhou Wang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+M">Mengnan Dong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+Y">Yongsheng Kang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hao Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xinran Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yutao Zhang</a> , et al. (3 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16982v1-abstract-short" style="display: inline;"> Recently, the Muon optimizer based on matrix orthogonalization has demonstrated strong results in training small-scale language models, but the scalability to larger models has not been proven. We identify two crucial techniques for scaling up Muon: (1) adding weight decay and (2) carefully adjusting the per-parameter update scale. These techniques allow Muon to work out-of-the-box on large-scale… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16982v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16982v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16982v1-abstract-full" style="display: none;"> Recently, the Muon optimizer based on matrix orthogonalization has demonstrated strong results in training small-scale language models, but the scalability to larger models has not been proven. We identify two crucial techniques for scaling up Muon: (1) adding weight decay and (2) carefully adjusting the per-parameter update scale. These techniques allow Muon to work out-of-the-box on large-scale training without the need of hyper-parameter tuning. Scaling law experiments indicate that Muon achieves $\sim\!2\times$ computational efficiency compared to AdamW with compute optimal training. Based on these improvements, we introduce Moonlight, a 3B/16B-parameter Mixture-of-Expert (MoE) model trained with 5.7T tokens using Muon. Our model improves the current Pareto frontier, achieving better performance with much fewer training FLOPs compared to prior models. We open-source our distributed Muon implementation that is memory optimal and communication efficient. We also release the pretrained, instruction-tuned, and intermediate checkpoints to support future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16982v1-abstract-full').style.display = 'none'; document.getElementById('2502.16982v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15613">arXiv:2502.15613</a> <span> [<a href="https://arxiv.org/pdf/2502.15613">pdf</a>, <a href="https://arxiv.org/format/2502.15613">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Pick-and-place Manipulation Across Grippers Without Retraining: A Learning-optimization Diffusion Policy Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xiangtong Yao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yirui Zhou</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+Y">Yuan Meng</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+L">Liangyu Dong</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+L">Lin Hong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zitao Zhang</a>, <a href="/search/cs?searchtype=author&query=Bing%2C+Z">Zhenshan Bing</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kai Huang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+F">Fuchun Sun</a>, <a href="/search/cs?searchtype=author&query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15613v1-abstract-short" style="display: inline;"> Current robotic pick-and-place policies typically require consistent gripper configurations across training and inference. This constraint imposes high retraining or fine-tuning costs, especially for imitation learning-based approaches, when adapting to new end-effectors. To mitigate this issue, we present a diffusion-based policy with a hybrid learning-optimization framework, enabling zero-shot a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15613v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15613v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15613v1-abstract-full" style="display: none;"> Current robotic pick-and-place policies typically require consistent gripper configurations across training and inference. This constraint imposes high retraining or fine-tuning costs, especially for imitation learning-based approaches, when adapting to new end-effectors. To mitigate this issue, we present a diffusion-based policy with a hybrid learning-optimization framework, enabling zero-shot adaptation to novel grippers without additional data collection for retraining policy. During training, the policy learns manipulation primitives from demonstrations collected using a base gripper. At inference, a diffusion-based optimization strategy dynamically enforces kinematic and safety constraints, ensuring that generated trajectories align with the physical properties of unseen grippers. This is achieved through a constrained denoising procedure that adapts trajectories to gripper-specific parameters (e.g., tool-center-point offsets, jaw widths) while preserving collision avoidance and task feasibility. We validate our method on a Franka Panda robot across six gripper configurations, including 3D-printed fingertips, flexible silicone gripper, and Robotiq 2F-85 gripper. Our approach achieves a 93.3% average task success rate across grippers (vs. 23.3-26.7% for diffusion policy baselines), supporting tool-center-point variations of 16-23.5 cm and jaw widths of 7.5-11.5 cm. The results demonstrate that constrained diffusion enables robust cross-gripper manipulation while maintaining the sample efficiency of imitation learning, eliminating the need for gripper-specific retraining. Video and code are available at https://github.com/yaoxt3/GADP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15613v1-abstract-full').style.display = 'none'; document.getElementById('2502.15613v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Video and code are available at https://github.com/yaoxt3/GADP</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12029">arXiv:2502.12029</a> <span> [<a href="https://arxiv.org/pdf/2502.12029">pdf</a>, <a href="https://arxiv.org/format/2502.12029">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> KnowPath: Knowledge-enhanced Reasoning via LLM-generated Inference Paths over Knowledge Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+Q">Qi Zhao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hongyu Yang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Q">Qi Song</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xinwei Yao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiangyang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12029v2-abstract-short" style="display: inline;"> Large language models (LLMs) have demonstrated remarkable capabilities in various complex tasks, yet they still suffer from hallucinations. Introducing external knowledge, such as knowledge graph, can enhance the LLMs' ability to provide factual answers. LLMs have the ability to interactively explore knowledge graphs. However, most approaches have been affected by insufficient internal knowledge e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12029v2-abstract-full').style.display = 'inline'; document.getElementById('2502.12029v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12029v2-abstract-full" style="display: none;"> Large language models (LLMs) have demonstrated remarkable capabilities in various complex tasks, yet they still suffer from hallucinations. Introducing external knowledge, such as knowledge graph, can enhance the LLMs' ability to provide factual answers. LLMs have the ability to interactively explore knowledge graphs. However, most approaches have been affected by insufficient internal knowledge excavation in LLMs, limited generation of trustworthy knowledge reasoning paths, and a vague integration between internal and external knowledge. Therefore, we propose KnowPath, a knowledge-enhanced large model framework driven by the collaboration of internal and external knowledge. It relies on the internal knowledge of the LLM to guide the exploration of interpretable directed subgraphs in external knowledge graphs, better integrating the two knowledge sources for more accurate reasoning. Extensive experiments on multiple real-world datasets confirm the superiority of KnowPath. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12029v2-abstract-full').style.display = 'none'; document.getElementById('2502.12029v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10330">arXiv:2502.10330</a> <span> [<a href="https://arxiv.org/pdf/2502.10330">pdf</a>, <a href="https://arxiv.org/format/2502.10330">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DiOpt: Self-supervised Diffusion for Constrained Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ding%2C+S">Shutong Ding</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yimiao Zhou</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+K">Ke Hu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xi Yao</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+J">Junchi Yan</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+X">Xiaoying Tang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Ye Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10330v1-abstract-short" style="display: inline;"> Recent advances in diffusion models show promising potential for learning-based optimization by leveraging their multimodal sampling capability to escape local optima. However, existing diffusion-based optimization approaches, often reliant on supervised training, lacks a mechanism to ensure strict constraint satisfaction which is often required in real-world applications. One resulting observatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10330v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10330v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10330v1-abstract-full" style="display: none;"> Recent advances in diffusion models show promising potential for learning-based optimization by leveraging their multimodal sampling capability to escape local optima. However, existing diffusion-based optimization approaches, often reliant on supervised training, lacks a mechanism to ensure strict constraint satisfaction which is often required in real-world applications. One resulting observation is the distributional misalignment, i.e. the generated solution distribution often exhibits small overlap with the feasible domain. In this paper, we propose DiOpt, a novel diffusion paradigm that systematically learns near-optimal feasible solution distributions through iterative self-training. Our framework introduces several key innovations: a target distribution specifically designed to maximize overlap with the constrained solution manifold; a bootstrapped self-training mechanism that adaptively weights candidate solutions based on the severity of constraint violations and optimality gaps; and a dynamic memory buffer that accelerates convergence by retaining high-quality solutions over training iterations. To our knowledge, DiOpt represents the first successful integration of self-supervised diffusion with hard constraint satisfaction. Evaluations on diverse tasks, including power grid control, motion retargeting, wireless allocation demonstrate its superiority in terms of both optimality and constraint satisfaction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10330v1-abstract-full').style.display = 'none'; document.getElementById('2502.10330v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09334">arXiv:2502.09334</a> <span> [<a href="https://arxiv.org/pdf/2502.09334">pdf</a>, <a href="https://arxiv.org/format/2502.09334">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> ThunderServe: High-performance and Cost-efficient LLM Serving in Cloud Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Youhe Jiang</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+F">Fangcheng Fu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xiaozhe Yao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Taiyi Wang</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+B">Bin Cui</a>, <a href="/search/cs?searchtype=author&query=Klimovic%2C+A">Ana Klimovic</a>, <a href="/search/cs?searchtype=author&query=Yoneki%2C+E">Eiko Yoneki</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09334v1-abstract-short" style="display: inline;"> Recent developments in large language models (LLMs) have demonstrated their remarkable proficiency in a range of tasks. Compared to in-house homogeneous GPU clusters, deploying LLMs in cloud environments with diverse types of GPUs is crucial for addressing the GPU shortage problem and being more cost-effective. However, the diversity of network environments and various GPU types on the cloud bring… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09334v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09334v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09334v1-abstract-full" style="display: none;"> Recent developments in large language models (LLMs) have demonstrated their remarkable proficiency in a range of tasks. Compared to in-house homogeneous GPU clusters, deploying LLMs in cloud environments with diverse types of GPUs is crucial for addressing the GPU shortage problem and being more cost-effective. However, the diversity of network environments and various GPU types on the cloud bring difficulties to achieving high-performance serving. In this work, we propose ThunderServe, a high-performance and cost-efficient LLM serving system for heterogeneous cloud environments. We introduce a novel scheduling algorithm, which optimizes the deployment plan of LLM serving to accommodate the heterogeneous resource and network bandwidth conditions in cloud environments. Furthermore, we propose a lightweight re-scheduling mechanism, designed to adapt to fluctuating online conditions (e.g., node failures, workload shifts) without the need for costly restarts of ongoing services. Empirical results in both heterogeneous cloud and homogeneous in-house environments reveal that ThunderServe delivers up to a 2.1$\times$ and on average a $1.7\times$ increase in throughput and achieves up to a 2.5$\times$ and on average a $1.5\times$ reduction in latency deadlines compared with state-of-the-art systems given the same price budget, suggesting opting for cloud services provides a more cost-efficient solution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09334v1-abstract-full').style.display = 'none'; document.getElementById('2502.09334v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MLSys 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07302">arXiv:2502.07302</a> <span> [<a href="https://arxiv.org/pdf/2502.07302">pdf</a>, <a href="https://arxiv.org/format/2502.07302">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CASC-AI: Consensus-aware Self-corrective Learning for Noise Cell Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deng%2C+R">Ruining Deng</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yihe Yang</a>, <a href="/search/cs?searchtype=author&query=Pisapia%2C+D+J">David J. Pisapia</a>, <a href="/search/cs?searchtype=author&query=Liechty%2C+B">Benjamin Liechty</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Junchao Zhu</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+J">Juming Xiong</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Junlin Guo</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Z">Zhengyi Lu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xing Yao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+R">Runxuan Yu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rendong Zhang</a>, <a href="/search/cs?searchtype=author&query=Rudravaram%2C+G">Gaurav Rudravaram</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+M">Mengmeng Yin</a>, <a href="/search/cs?searchtype=author&query=Sarder%2C+P">Pinaki Sarder</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Haichun Yang</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+Y">Yuankai Huo</a>, <a href="/search/cs?searchtype=author&query=Sabuncu%2C+M+R">Mert R. Sabuncu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07302v2-abstract-short" style="display: inline;"> Multi-class cell segmentation in high-resolution gigapixel whole slide images (WSIs) is crucial for various clinical applications. However, training such models typically requires labor-intensive, pixel-wise annotations by domain experts. Recent efforts have democratized this process by involving lay annotators without medical expertise. However, conventional non-corrective approaches struggle to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07302v2-abstract-full').style.display = 'inline'; document.getElementById('2502.07302v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07302v2-abstract-full" style="display: none;"> Multi-class cell segmentation in high-resolution gigapixel whole slide images (WSIs) is crucial for various clinical applications. However, training such models typically requires labor-intensive, pixel-wise annotations by domain experts. Recent efforts have democratized this process by involving lay annotators without medical expertise. However, conventional non-corrective approaches struggle to handle annotation noise adaptively because they lack mechanisms to mitigate false positives (FP) and false negatives (FN) at both the image-feature and pixel levels. In this paper, we propose a consensus-aware self-corrective AI agent that leverages the Consensus Matrix to guide its learning process. The Consensus Matrix defines regions where both the AI and annotators agree on cell and non-cell annotations, which are prioritized with stronger supervision. Conversely, areas of disagreement are adaptively weighted based on their feature similarity to high-confidence consensus regions, with more similar regions receiving greater attention. Additionally, contrastive learning is employed to separate features of noisy regions from those of reliable consensus regions by maximizing their dissimilarity. This paradigm enables the model to iteratively refine noisy labels, enhancing its robustness. Validated on one real-world lay-annotated cell dataset and two reasoning-guided simulated noisy datasets, our method demonstrates improved segmentation performance, effectively correcting FP and FN errors and showcasing its potential for training robust models on noisy datasets. The official implementation and cell annotations are publicly available at https://github.com/ddrrnn123/CASC-AI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07302v2-abstract-full').style.display = 'none'; document.getElementById('2502.07302v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04667">arXiv:2502.04667</a> <span> [<a href="https://arxiv.org/pdf/2502.04667">pdf</a>, <a href="https://arxiv.org/format/2502.04667">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Unveiling the Mechanisms of Explicit CoT Training: How Chain-of-Thought Enhances Reasoning Generalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xinhao Yao</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+R">Ruifeng Ren</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+Y">Yun Liao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yong Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04667v1-abstract-short" style="display: inline;"> Training large language models (LLMs) with high-quality Chain-of-Thought (CoT) annotations has become a widely adopted strategy due to its significant enhancement of reasoning capabilities. To fully comprehend this approach, two questions naturally arise: (Q1) What advantages does training with CoT offer compared to training without CoT? (Q2) If there are advantages, what are the underlying mechan… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04667v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04667v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04667v1-abstract-full" style="display: none;"> Training large language models (LLMs) with high-quality Chain-of-Thought (CoT) annotations has become a widely adopted strategy due to its significant enhancement of reasoning capabilities. To fully comprehend this approach, two questions naturally arise: (Q1) What advantages does training with CoT offer compared to training without CoT? (Q2) If there are advantages, what are the underlying mechanisms of explicit CoT training? Analyzing the advantages and mechanisms of CoT training is challenging due to the many factors involved. To address this, we conduct a detailed analysis using clear and controllable data distributions and, for the first time, reveal that CoT training offers the following advantages: (1) Training with CoT markedly improves reasoning generalization, extending it from in-distribution (ID) to both ID and out-of-distribution (OOD) scenarios, while also speeding up convergence; (2) Even when training with CoT includes a certain range of erroneous reasoning steps, it still enables the model to learn reasoning patterns, leading to systematic generalization. We further explore the underlying mechanisms from a circuit perspective: (1) The data distribution (e.g., ratio $位$ and pattern) plays a crucial role in influencing the model's systematic generalization; (2) CoT training (with two-hop facts) internalizes reasoning into a two-stage generalizing circuit, where the number of stages corresponds to the explicit reasoning steps during training. Our findings elucidate the mechanisms underlying explicit CoT training and offer critical insights into tuning strategies for LLMs to achieve robust generalization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04667v1-abstract-full').style.display = 'none'; document.getElementById('2502.04667v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02584">arXiv:2502.02584</a> <span> [<a href="https://arxiv.org/pdf/2502.02584">pdf</a>, <a href="https://arxiv.org/format/2502.02584">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> QLASS: Boosting Language Agent Inference via Q-Guided Stepwise Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+Z">Zongyu Lin</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yao Tang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xingcheng Yao</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Da Yin</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Z">Ziniu Hu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yizhou Sun</a>, <a href="/search/cs?searchtype=author&query=Chang%2C+K">Kai-Wei Chang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02584v1-abstract-short" style="display: inline;"> Language agents have become a promising solution to complex interactive tasks. One of the key ingredients to the success of language agents is the reward model on the trajectory of the agentic workflow, which provides valuable guidance during training or inference. However, due to the lack of annotations of intermediate interactions, most existing works use an outcome reward model to optimize poli… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02584v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02584v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02584v1-abstract-full" style="display: none;"> Language agents have become a promising solution to complex interactive tasks. One of the key ingredients to the success of language agents is the reward model on the trajectory of the agentic workflow, which provides valuable guidance during training or inference. However, due to the lack of annotations of intermediate interactions, most existing works use an outcome reward model to optimize policies across entire trajectories. This may lead to sub-optimal policies and hinder the overall performance. To address this, we propose QLASS (Q-guided Language Agent Stepwise Search), to automatically generate annotations by estimating Q-values in a stepwise manner for open language agents. By introducing a reasoning tree and performing process reward modeling, QLASS provides effective intermediate guidance for each step. With the stepwise guidance, we propose a Q-guided generation strategy to enable language agents to better adapt to long-term value, resulting in significant performance improvement during model inference on complex interactive agent tasks. Notably, even with almost half the annotated data, QLASS retains strong performance, demonstrating its efficiency in handling limited supervision. We also empirically demonstrate that QLASS can lead to more effective decision making through qualitative analysis. We will release our code and data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02584v1-abstract-full').style.display = 'none'; document.getElementById('2502.02584v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01971">arXiv:2502.01971</a> <span> [<a href="https://arxiv.org/pdf/2502.01971">pdf</a>, <a href="https://arxiv.org/ps/2502.01971">ps</a>, <a href="https://arxiv.org/format/2502.01971">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Bottom-Up Reputation Promotes Cooperation with Multi-Agent Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ren%2C+T">Tianyu Ren</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xuan Yao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yang Li</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+X">Xiao-Jun Zeng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01971v1-abstract-short" style="display: inline;"> Reputation serves as a powerful mechanism for promoting cooperation in multi-agent systems, as agents are more inclined to cooperate with those of good social standing. While existing multi-agent reinforcement learning methods typically rely on predefined social norms to assign reputations, the question of how a population reaches a consensus on judgement when agents hold private, independent view… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01971v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01971v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01971v1-abstract-full" style="display: none;"> Reputation serves as a powerful mechanism for promoting cooperation in multi-agent systems, as agents are more inclined to cooperate with those of good social standing. While existing multi-agent reinforcement learning methods typically rely on predefined social norms to assign reputations, the question of how a population reaches a consensus on judgement when agents hold private, independent views remains unresolved. In this paper, we propose a novel bottom-up reputation learning method, Learning with Reputation Reward (LR2), designed to promote cooperative behaviour through rewards shaping based on assigned reputation. Our agent architecture includes a dilemma policy that determines cooperation by considering the impact on neighbours, and an evaluation policy that assigns reputations to affect the actions of neighbours while optimizing self-objectives. It operates using local observations and interaction-based rewards, without relying on centralized modules or predefined norms. Our findings demonstrate the effectiveness and adaptability of LR2 across various spatial social dilemma scenarios. Interestingly, we find that LR2 stabilizes and enhances cooperation not only with reward reshaping from bottom-up reputation but also by fostering strategy clustering in structured populations, thereby creating environments conducive to sustained cooperation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01971v1-abstract-full').style.display = 'none'; document.getElementById('2502.01971v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAMAS 2025 (24th International Conference on Autonomous Agents and Multiagent Systems)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00722">arXiv:2502.00722</a> <span> [<a href="https://arxiv.org/pdf/2502.00722">pdf</a>, <a href="https://arxiv.org/format/2502.00722">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Demystifying Cost-Efficiency in LLM Serving over Heterogeneous GPUs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Youhe Jiang</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+F">Fangcheng Fu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xiaozhe Yao</a>, <a href="/search/cs?searchtype=author&query=He%2C+G">Guoliang He</a>, <a href="/search/cs?searchtype=author&query=Miao%2C+X">Xupeng Miao</a>, <a href="/search/cs?searchtype=author&query=Klimovic%2C+A">Ana Klimovic</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+B">Bin Cui</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+B">Binhang Yuan</a>, <a href="/search/cs?searchtype=author&query=Yoneki%2C+E">Eiko Yoneki</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00722v1-abstract-short" style="display: inline;"> Recent advancements in Large Language Models (LLMs) have led to increasingly diverse requests, accompanied with varying resource (compute and memory) demands to serve them. However, this in turn degrades the cost-efficiency of LLM serving as common practices primarily rely on homogeneous GPU resources. In response to this problem, this work conducts a thorough study about serving LLMs over heterog… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00722v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00722v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00722v1-abstract-full" style="display: none;"> Recent advancements in Large Language Models (LLMs) have led to increasingly diverse requests, accompanied with varying resource (compute and memory) demands to serve them. However, this in turn degrades the cost-efficiency of LLM serving as common practices primarily rely on homogeneous GPU resources. In response to this problem, this work conducts a thorough study about serving LLMs over heterogeneous GPU resources on cloud platforms. The rationale is that different GPU types exhibit distinct compute and memory characteristics, aligning well with the divergent resource demands of diverse requests. Particularly, through comprehensive benchmarking, we discover that the cost-efficiency of LLM serving can be substantially optimized by meticulously determining GPU composition, deployment configurations, and workload assignments. Subsequently, we design a scheduling algorithm via mixed-integer linear programming, aiming at deducing the most cost-efficient serving plan under the constraints of price budget and real-time GPU availability. Remarkably, our approach effectively outperforms homogeneous and heterogeneous baselines under a wide array of scenarios, covering diverse workload traces, varying GPU availablilities, and multi-model serving. This casts new light on more accessible and efficient LLM serving over heterogeneous cloud resources. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00722v1-abstract-full').style.display = 'none'; document.getElementById('2502.00722v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06753">arXiv:2501.06753</a> <span> [<a href="https://arxiv.org/pdf/2501.06753">pdf</a>, <a href="https://arxiv.org/format/2501.06753">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Procedural Fairness and Its Relationship with Distributive Fairness in Machine Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Ziming Wang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Changwu Huang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+K">Ke Tang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xin Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06753v1-abstract-short" style="display: inline;"> Fairness in machine learning (ML) has garnered significant attention in recent years. While existing research has predominantly focused on the distributive fairness of ML models, there has been limited exploration of procedural fairness. This paper proposes a novel method to achieve procedural fairness during the model training phase. The effectiveness of the proposed method is validated through e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06753v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06753v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06753v1-abstract-full" style="display: none;"> Fairness in machine learning (ML) has garnered significant attention in recent years. While existing research has predominantly focused on the distributive fairness of ML models, there has been limited exploration of procedural fairness. This paper proposes a novel method to achieve procedural fairness during the model training phase. The effectiveness of the proposed method is validated through experiments conducted on one synthetic and six real-world datasets. Additionally, this work studies the relationship between procedural fairness and distributive fairness in ML models. On one hand, the impact of dataset bias and the procedural fairness of ML model on its distributive fairness is examined. The results highlight a significant influence of both dataset bias and procedural fairness on distributive fairness. On the other hand, the distinctions between optimizing procedural and distributive fairness metrics are analyzed. Experimental results demonstrate that optimizing procedural fairness metrics mitigates biases introduced or amplified by the decision-making process, thereby ensuring fairness in the decision-making process itself, as well as improving distributive fairness. In contrast, optimizing distributive fairness metrics encourages the ML model's decision-making process to favor disadvantaged groups, counterbalancing the inherent preferences for advantaged groups present in the dataset and ultimately achieving distributive fairness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06753v1-abstract-full').style.display = 'none'; document.getElementById('2501.06753v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">33 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02506">arXiv:2501.02506</a> <span> [<a href="https://arxiv.org/pdf/2501.02506">pdf</a>, <a href="https://arxiv.org/format/2501.02506">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ToolHop: A Query-Driven Benchmark for Evaluating Large Language Models in Multi-Hop Tool Use </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+J">Junjie Ye</a>, <a href="/search/cs?searchtype=author&query=Du%2C+Z">Zhengyin Du</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xuesong Yao</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+W">Weijian Lin</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yufei Xu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zehui Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zaiyuan Wang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+S">Sining Zhu</a>, <a href="/search/cs?searchtype=author&query=Xi%2C+Z">Zhiheng Xi</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+S">Siyu Yuan</a>, <a href="/search/cs?searchtype=author&query=Gui%2C+T">Tao Gui</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xuanjing Huang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jiecao Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02506v2-abstract-short" style="display: inline;"> Effective evaluation of multi-hop tool use is critical for analyzing the understanding, reasoning, and function-calling capabilities of large language models (LLMs). However, progress has been hindered by a lack of reliable evaluation datasets. To address this, we present ToolHop, a dataset comprising 995 user queries and 3,912 associated tools, specifically designed for rigorous evaluation of mul… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02506v2-abstract-full').style.display = 'inline'; document.getElementById('2501.02506v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02506v2-abstract-full" style="display: none;"> Effective evaluation of multi-hop tool use is critical for analyzing the understanding, reasoning, and function-calling capabilities of large language models (LLMs). However, progress has been hindered by a lack of reliable evaluation datasets. To address this, we present ToolHop, a dataset comprising 995 user queries and 3,912 associated tools, specifically designed for rigorous evaluation of multi-hop tool use. ToolHop ensures diverse queries, meaningful interdependencies, locally executable tools, detailed feedback, and verifiable answers through a novel query-driven data construction approach that includes tool creation, document refinement, and code generation. We evaluate 14 LLMs across five model families (i.e., LLaMA3.1, Qwen2.5, Gemini1.5, Claude3.5, and GPT), uncovering significant challenges in handling multi-hop tool-use scenarios. The leading model, GPT-4o, achieves an accuracy of 49.04%, underscoring substantial room for improvement. Further analysis reveals variations in tool-use strategies for various families, offering actionable insights to guide the development of more effective approaches. Code and data can be found in https://huggingface.co/datasets/bytedance-research/ToolHop. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02506v2-abstract-full').style.display = 'none'; document.getElementById('2501.02506v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02177">arXiv:2501.02177</a> <span> [<a href="https://arxiv.org/pdf/2501.02177">pdf</a>, <a href="https://arxiv.org/format/2501.02177">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> IMUFace: Real-Time, Low-Power, Continuous 3D Facial Reconstruction Through Earphones </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xianrong Yao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+C">Chengzhang Yu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+L">Lingde Hu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Y">Yincheng Jin</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yang Gao</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Z">Zhanpeng Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02177v1-abstract-short" style="display: inline;"> The potential of facial expression reconstruction technology is significant, with applications in various fields such as human-computer interaction, affective computing, and virtual reality. Recent studies have proposed using ear-worn devices for facial expression reconstruction to address the environmental limitations and privacy concerns associated with traditional camera-based methods. However,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02177v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02177v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02177v1-abstract-full" style="display: none;"> The potential of facial expression reconstruction technology is significant, with applications in various fields such as human-computer interaction, affective computing, and virtual reality. Recent studies have proposed using ear-worn devices for facial expression reconstruction to address the environmental limitations and privacy concerns associated with traditional camera-based methods. However, these approaches still require improvements in terms of aesthetics and power consumption. This paper introduces a system called IMUFace. It uses inertial measurement units (IMUs) embedded in wireless earphones to detect subtle ear movements caused by facial muscle activities, allowing for covert and low-power facial reconstruction. A user study involving 12 participants was conducted, and a deep learning model named IMUTwinTrans was proposed. The results show that IMUFace can accurately predict users' facial landmarks with a precision of 2.21 mm, using only five minutes of training data. The predicted landmarks can be utilized to reconstruct a three-dimensional facial model. IMUFace operates at a sampling rate of 30 Hz with a relatively low power consumption of 58 mW. The findings presented in this study demonstrate the real-world applicability of IMUFace and highlight potential directions for further research to facilitate its practical adoption. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02177v1-abstract-full').style.display = 'none'; document.getElementById('2501.02177v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.06820">arXiv:2412.06820</a> <span> [<a href="https://arxiv.org/pdf/2412.06820">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.neucom.2024.129053">10.1016/j.neucom.2024.129053 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Artificial Intelligence without Restriction Surpassing Human Intelligence with Probability One: Theoretical Insight into Secrets of the Brain with AI Twins of the Brain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+G">Guang-Bin Huang</a>, <a href="/search/cs?searchtype=author&query=Westover%2C+M+B">M. Brandon Westover</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+E">Eng-King Tan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haibo Wang</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+D">Dongshun Cui</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+W">Wei-Ying Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tiantong Wang</a>, <a href="/search/cs?searchtype=author&query=He%2C+Q">Qi He</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+H">Haikun Wei</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+N">Ning Wang</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Q">Qiyuan Tian</a>, <a href="/search/cs?searchtype=author&query=Lam%2C+K">Kwok-Yan Lam</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xin Yao</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+T+Y">Tien Yin Wong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.06820v1-abstract-short" style="display: inline;"> Artificial Intelligence (AI) has apparently become one of the most important techniques discovered by humans in history while the human brain is widely recognized as one of the most complex systems in the universe. One fundamental critical question which would affect human sustainability remains open: Will artificial intelligence (AI) evolve to surpass human intelligence in the future? This paper… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06820v1-abstract-full').style.display = 'inline'; document.getElementById('2412.06820v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.06820v1-abstract-full" style="display: none;"> Artificial Intelligence (AI) has apparently become one of the most important techniques discovered by humans in history while the human brain is widely recognized as one of the most complex systems in the universe. One fundamental critical question which would affect human sustainability remains open: Will artificial intelligence (AI) evolve to surpass human intelligence in the future? This paper shows that in theory new AI twins with fresh cellular level of AI techniques for neuroscience could approximate the brain and its functioning systems (e.g. perception and cognition functions) with any expected small error and AI without restrictions could surpass human intelligence with probability one in the end. This paper indirectly proves the validity of the conjecture made by Frank Rosenblatt 70 years ago about the potential capabilities of AI, especially in the realm of artificial neural networks. Intelligence is just one of fortuitous but sophisticated creations of the nature which has not been fully discovered. Like mathematics and physics, with no restrictions artificial intelligence would lead to a new subject with its self-contained systems and principles. We anticipate that this paper opens new doors for 1) AI twins and other AI techniques to be used in cellular level of efficient neuroscience dynamic analysis, functioning analysis of the brain and brain illness solutions; 2) new worldwide collaborative scheme for interdisciplinary teams concurrently working on and modelling different types of neurons and synapses and different level of functioning subsystems of the brain with AI techniques; 3) development of low energy of AI techniques with the aid of fundamental neuroscience properties; and 4) new controllable, explainable and safe AI techniques with reasoning capabilities of discovering principles in nature. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06820v1-abstract-full').style.display = 'none'; document.getElementById('2412.06820v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by journal Neurocomputing</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.06262">arXiv:2412.06262</a> <span> [<a href="https://arxiv.org/pdf/2412.06262">pdf</a>, <a href="https://arxiv.org/format/2412.06262">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> A Lightweight U-like Network Utilizing Neural Memory Ordinary Differential Equations for Slimming the Decoder </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=He%2C+Q">Quansong He</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xiaojun Yao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jun Wu</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+Z">Zhang Yi</a>, <a href="/search/cs?searchtype=author&query=He%2C+T">Tao He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.06262v1-abstract-short" style="display: inline;"> In recent years, advanced U-like networks have demonstrated remarkable performance in medical image segmentation tasks. However, their drawbacks, including excessive parameters, high computational complexity, and slow inference speed, pose challenges for practical implementation in scenarios with limited computational resources. Existing lightweight U-like networks have alleviated some of these pr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06262v1-abstract-full').style.display = 'inline'; document.getElementById('2412.06262v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.06262v1-abstract-full" style="display: none;"> In recent years, advanced U-like networks have demonstrated remarkable performance in medical image segmentation tasks. However, their drawbacks, including excessive parameters, high computational complexity, and slow inference speed, pose challenges for practical implementation in scenarios with limited computational resources. Existing lightweight U-like networks have alleviated some of these problems, but they often have pre-designed structures and consist of inseparable modules, limiting their application scenarios. In this paper, we propose three plug-and-play decoders by employing different discretization methods of the neural memory Ordinary Differential Equations (nmODEs). These decoders integrate features at various levels of abstraction by processing information from skip connections and performing numerical operations on upward path. Through experiments on the PH2, ISIC2017, and ISIC2018 datasets, we embed these decoders into different U-like networks, demonstrating their effectiveness in significantly reducing the number of parameters and FLOPs while maintaining performance. In summary, the proposed discretized nmODEs decoders are capable of reducing the number of parameters by about 20% ~ 50% and FLOPs by up to 74%, while possessing the potential to adapt to all U-like networks. Our code is available at https://github.com/nayutayuki/Lightweight-nmODE-Decoders-For-U-like-networks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06262v1-abstract-full').style.display = 'none'; document.getElementById('2412.06262v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.03136">arXiv:2412.03136</a> <span> [<a href="https://arxiv.org/pdf/2412.03136">pdf</a>, <a href="https://arxiv.org/format/2412.03136">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/IROS58592.2024.10802357">10.1109/IROS58592.2024.10802357 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Asynchronous Event-Inertial Odometry using a Unified Gaussian Process Regression Framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xudong Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhixiang Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zihao Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yizhai Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+F">Fan Zhang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xiuming Yao</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+P">Panfeng Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.03136v1-abstract-short" style="display: inline;"> Recent works have combined monocular event camera and inertial measurement unit to estimate the $SE(3)$ trajectory. However, the asynchronicity of event cameras brings a great challenge to conventional fusion algorithms. In this paper, we present an asynchronous event-inertial odometry under a unified Gaussian Process (GP) regression framework to naturally fuse asynchronous data associations and i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03136v1-abstract-full').style.display = 'inline'; document.getElementById('2412.03136v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.03136v1-abstract-full" style="display: none;"> Recent works have combined monocular event camera and inertial measurement unit to estimate the $SE(3)$ trajectory. However, the asynchronicity of event cameras brings a great challenge to conventional fusion algorithms. In this paper, we present an asynchronous event-inertial odometry under a unified Gaussian Process (GP) regression framework to naturally fuse asynchronous data associations and inertial measurements. A GP latent variable model is leveraged to build data-driven motion prior and acquire the analytical integration capacity. Then, asynchronous event-based feature associations and integral pseudo measurements are tightly coupled using the same GP framework. Subsequently, this fusion estimation problem is solved by underlying factor graph in a sliding-window manner. With consideration of sparsity, those historical states are marginalized orderly. A twin system is also designed for comparison, where the traditional inertial preintegration scheme is embedded in the GP-based framework to replace the GP latent variable model. Evaluations on public event-inertial datasets demonstrate the validity of both systems. Comparison experiments show competitive precision compared to the state-of-the-art synchronous scheme. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03136v1-abstract-full').style.display = 'none'; document.getElementById('2412.03136v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at IEEE IROS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14798">arXiv:2411.14798</a> <span> [<a href="https://arxiv.org/pdf/2411.14798">pdf</a>, <a href="https://arxiv.org/format/2411.14798">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Facial Features Matter: a Dynamic Watermark based Proactive Deepfake Detection Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lan%2C+S">Shulin Lan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kanlin Liu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yazhou Zhao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chen Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yingchao Wang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xingshan Yao</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+L">Liehuang Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14798v1-abstract-short" style="display: inline;"> Current passive deepfake face-swapping detection methods encounter significance bottlenecks in model generalization capabilities. Meanwhile, proactive detection methods often use fixed watermarks which lack a close relationship with the content they protect and are vulnerable to security risks. Dynamic watermarks based on facial features offer a promising solution, as these features provide unique… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14798v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14798v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14798v1-abstract-full" style="display: none;"> Current passive deepfake face-swapping detection methods encounter significance bottlenecks in model generalization capabilities. Meanwhile, proactive detection methods often use fixed watermarks which lack a close relationship with the content they protect and are vulnerable to security risks. Dynamic watermarks based on facial features offer a promising solution, as these features provide unique identifiers. Therefore, this paper proposes a Facial Feature-based Proactive deepfake detection method (FaceProtect), which utilizes changes in facial characteristics during deepfake manipulation as a novel detection mechanism. We introduce a GAN-based One-way Dynamic Watermark Generating Mechanism (GODWGM) that uses 128-dimensional facial feature vectors as inputs. This method creates irreversible mappings from facial features to watermarks, enhancing protection against various reverse inference attacks. Additionally, we propose a Watermark-based Verification Strategy (WVS) that combines steganography with GODWGM, allowing simultaneous transmission of the benchmark watermark representing facial features within the image. Experimental results demonstrate that our proposed method maintains exceptional detection performance and exhibits high practicality on images altered by various deepfake techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14798v1-abstract-full').style.display = 'none'; document.getElementById('2411.14798v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12372">arXiv:2411.12372</a> <span> [<a href="https://arxiv.org/pdf/2411.12372">pdf</a>, <a href="https://arxiv.org/format/2411.12372">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> RedPajama: an Open Dataset for Training Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Weber%2C+M">Maurice Weber</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+D">Daniel Fu</a>, <a href="/search/cs?searchtype=author&query=Anthony%2C+Q">Quentin Anthony</a>, <a href="/search/cs?searchtype=author&query=Oren%2C+Y">Yonatan Oren</a>, <a href="/search/cs?searchtype=author&query=Adams%2C+S">Shane Adams</a>, <a href="/search/cs?searchtype=author&query=Alexandrov%2C+A">Anton Alexandrov</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+X">Xiaozhong Lyu</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+H">Huu Nguyen</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xiaozhe Yao</a>, <a href="/search/cs?searchtype=author&query=Adams%2C+V">Virginia Adams</a>, <a href="/search/cs?searchtype=author&query=Athiwaratkun%2C+B">Ben Athiwaratkun</a>, <a href="/search/cs?searchtype=author&query=Chalamala%2C+R">Rahul Chalamala</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kezhen Chen</a>, <a href="/search/cs?searchtype=author&query=Ryabinin%2C+M">Max Ryabinin</a>, <a href="/search/cs?searchtype=author&query=Dao%2C+T">Tri Dao</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+P">Percy Liang</a>, <a href="/search/cs?searchtype=author&query=R%C3%A9%2C+C">Christopher R茅</a>, <a href="/search/cs?searchtype=author&query=Rish%2C+I">Irina Rish</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Ce Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12372v1-abstract-short" style="display: inline;"> Large language models are increasingly becoming a cornerstone technology in artificial intelligence, the sciences, and society as a whole, yet the optimal strategies for dataset composition and filtering remain largely elusive. Many of the top-performing models lack transparency in their dataset curation and model development processes, posing an obstacle to the development of fully open language… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12372v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12372v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12372v1-abstract-full" style="display: none;"> Large language models are increasingly becoming a cornerstone technology in artificial intelligence, the sciences, and society as a whole, yet the optimal strategies for dataset composition and filtering remain largely elusive. Many of the top-performing models lack transparency in their dataset curation and model development processes, posing an obstacle to the development of fully open language models. In this paper, we identify three core data-related challenges that must be addressed to advance open-source language models. These include (1) transparency in model development, including the data curation process, (2) access to large quantities of high-quality data, and (3) availability of artifacts and metadata for dataset curation and analysis. To address these challenges, we release RedPajama-V1, an open reproduction of the LLaMA training dataset. In addition, we release RedPajama-V2, a massive web-only dataset consisting of raw, unfiltered text data together with quality signals and metadata. Together, the RedPajama datasets comprise over 100 trillion tokens spanning multiple domains and with their quality signals facilitate the filtering of data, aiming to inspire the development of numerous new datasets. To date, these datasets have already been used in the training of strong language models used in production, such as Snowflake Arctic, Salesforce's XGen and AI2's OLMo. To provide insight into the quality of RedPajama, we present a series of analyses and ablation studies with decoder-only language models with up to 1.6B parameters. Our findings demonstrate how quality signals for web data can be effectively leveraged to curate high-quality subsets of the dataset, underscoring the potential of RedPajama to advance the development of transparent and high-performing language models at scale. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12372v1-abstract-full').style.display = 'none'; document.getElementById('2411.12372v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">38th Conference on Neural Information Processing Systems (NeurIPS 2024) Track on Datasets and Benchmarks</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12179">arXiv:2411.12179</a> <span> [<a href="https://arxiv.org/pdf/2411.12179">pdf</a>, <a href="https://arxiv.org/format/2411.12179">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Multi-Grained Preference Enhanced Transformer for Multi-Behavior Sequential Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=He%2C+C">Chuan He</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yongchao Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qiang Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Weiqiang Wang</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+X">Xin Fu</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+X">Xinyi Fu</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+C">Chuntao Hong</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xinwei Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12179v2-abstract-short" style="display: inline;"> Sequential recommendation (SR) aims to predict the next purchasing item according to users' dynamic preference learned from their historical user-item interactions. To improve the performance of recommendation, learning dynamic heterogeneous cross-type behavior dependencies is indispensable for recommender system. However, there still exists some challenges in Multi-Behavior Sequential Recommendat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12179v2-abstract-full').style.display = 'inline'; document.getElementById('2411.12179v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12179v2-abstract-full" style="display: none;"> Sequential recommendation (SR) aims to predict the next purchasing item according to users' dynamic preference learned from their historical user-item interactions. To improve the performance of recommendation, learning dynamic heterogeneous cross-type behavior dependencies is indispensable for recommender system. However, there still exists some challenges in Multi-Behavior Sequential Recommendation (MBSR). On the one hand, existing methods only model heterogeneous multi-behavior dependencies at behavior-level or item-level, and modelling interaction-level dependencies is still a challenge. On the other hand, the dynamic multi-grained behavior-aware preference is hard to capture in interaction sequences, which reflects interaction-aware sequential pattern. To tackle these challenges, we propose a Multi-Grained Preference enhanced Transformer framework (M-GPT). First, M-GPT constructs a interaction-level graph of historical cross-typed interactions in a sequence. Then graph convolution is performed to derive interaction-level multi-behavior dependency representation repeatedly, in which the complex correlation between historical cross-typed interactions at specific orders can be well learned. Secondly, a novel multi-scale transformer architecture equipped with multi-grained user preference extraction is proposed to encode the interaction-aware sequential pattern enhanced by capturing temporal behavior-aware multi-grained preference . Experiments on the real-world datasets indicate that our method M-GPT consistently outperforms various state-of-the-art recommendation methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12179v2-abstract-full').style.display = 'none'; document.getElementById('2411.12179v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.08703">arXiv:2411.08703</a> <span> [<a href="https://arxiv.org/pdf/2411.08703">pdf</a>, <a href="https://arxiv.org/format/2411.08703">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MVKTrans: Multi-View Knowledge Transfer for Robust Multiomics Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cong%2C+S">Shan Cong</a>, <a href="/search/cs?searchtype=author&query=Sang%2C+Z">Zhiling Sang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hongwei Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haoran Luo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xin Wang</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+H">Hong Liang</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+J">Jie Hao</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xiaohui Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.08703v1-abstract-short" style="display: inline;"> The distinct characteristics of multiomics data, including complex interactions within and across biological layers and disease heterogeneity (e.g., heterogeneity in etiology and clinical symptoms), drive us to develop novel designs to address unique challenges in multiomics prediction. In this paper, we propose the multi-view knowledge transfer learning (MVKTrans) framework, which transfers intra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08703v1-abstract-full').style.display = 'inline'; document.getElementById('2411.08703v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.08703v1-abstract-full" style="display: none;"> The distinct characteristics of multiomics data, including complex interactions within and across biological layers and disease heterogeneity (e.g., heterogeneity in etiology and clinical symptoms), drive us to develop novel designs to address unique challenges in multiomics prediction. In this paper, we propose the multi-view knowledge transfer learning (MVKTrans) framework, which transfers intra- and inter-omics knowledge in an adaptive manner by reviewing data heterogeneity and suppressing bias transfer, thereby enhancing classification performance. Specifically, we design a graph contrastive module that is trained on unlabeled data to effectively learn and transfer the underlying intra-omics patterns to the supervised task. This unsupervised pretraining promotes learning general and unbiased representations for each modality, regardless of the downstream tasks. In light of the varying discriminative capacities of modalities across different diseases and/or samples, we introduce an adaptive and bi-directional cross-omics distillation module. This module automatically identifies richer modalities and facilitates dynamic knowledge transfer from more informative to less informative omics, thereby enabling a more robust and generalized integration. Extensive experiments on four real biomedical datasets demonstrate the superior performance and robustness of MVKTrans compared to the state-of-the-art. Code and data are available at https://github.com/Yaolab-fantastic/MVKTrans. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.08703v1-abstract-full').style.display = 'none'; document.getElementById('2411.08703v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06750">arXiv:2411.06750</a> <span> [<a href="https://arxiv.org/pdf/2411.06750">pdf</a>, <a href="https://arxiv.org/format/2411.06750">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SynStitch: a Self-Supervised Learning Network for Ultrasound Image Stitching Using Synthetic Training Pairs and Indirect Supervision </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xing Yao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+R">Runxuan Yu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+D">Dewei Hu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hao Yang</a>, <a href="/search/cs?searchtype=author&query=Lou%2C+A">Ange Lou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+D">Daiwei Lu</a>, <a href="/search/cs?searchtype=author&query=Arenas%2C+G">Gabriel Arenas</a>, <a href="/search/cs?searchtype=author&query=Oguz%2C+B">Baris Oguz</a>, <a href="/search/cs?searchtype=author&query=Pouch%2C+A">Alison Pouch</a>, <a href="/search/cs?searchtype=author&query=Schwartz%2C+N">Nadav Schwartz</a>, <a href="/search/cs?searchtype=author&query=Byram%2C+B+C">Brett C Byram</a>, <a href="/search/cs?searchtype=author&query=Oguz%2C+I">Ipek Oguz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06750v1-abstract-short" style="display: inline;"> Ultrasound (US) image stitching can expand the field-of-view (FOV) by combining multiple US images from varied probe positions. However, registering US images with only partially overlapping anatomical contents is a challenging task. In this work, we introduce SynStitch, a self-supervised framework designed for 2DUS stitching. SynStitch consists of a synthetic stitching pair generation module (SSP… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06750v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06750v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06750v1-abstract-full" style="display: none;"> Ultrasound (US) image stitching can expand the field-of-view (FOV) by combining multiple US images from varied probe positions. However, registering US images with only partially overlapping anatomical contents is a challenging task. In this work, we introduce SynStitch, a self-supervised framework designed for 2DUS stitching. SynStitch consists of a synthetic stitching pair generation module (SSPGM) and an image stitching module (ISM). SSPGM utilizes a patch-conditioned ControlNet to generate realistic 2DUS stitching pairs with known affine matrix from a single input image. ISM then utilizes this synthetic paired data to learn 2DUS stitching in a supervised manner. Our framework was evaluated against multiple leading methods on a kidney ultrasound dataset, demonstrating superior 2DUS stitching performance through both qualitative and quantitative analyses. The code will be made public upon acceptance of the paper. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06750v1-abstract-full').style.display = 'none'; document.getElementById('2411.06750v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23805">arXiv:2410.23805</a> <span> [<a href="https://arxiv.org/pdf/2410.23805">pdf</a>, <a href="https://arxiv.org/format/2410.23805">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> MemANNS: Enhancing Billion-Scale ANNS Efficiency with Practical PIM Hardware </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+S">Sitian Chen</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+A+C">Amelie Chi Zhou</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yucheng Shi</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yusen Li</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xin Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23805v1-abstract-short" style="display: inline;"> In numerous production environments, Approximate Nearest Neighbor Search (ANNS) plays an indispensable role, particularly when dealing with massive datasets that can contain billions of entries. The necessity for rapid response times in these applications makes the efficiency of ANNS algorithms crucial. However, traditional ANNS approaches encounter substantial challenges at the billion-scale leve… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23805v1-abstract-full').style.display = 'inline'; document.getElementById('2410.23805v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23805v1-abstract-full" style="display: none;"> In numerous production environments, Approximate Nearest Neighbor Search (ANNS) plays an indispensable role, particularly when dealing with massive datasets that can contain billions of entries. The necessity for rapid response times in these applications makes the efficiency of ANNS algorithms crucial. However, traditional ANNS approaches encounter substantial challenges at the billion-scale level. CPU-based methods are hindered by the limitations of memory bandwidth, while GPU-based methods struggle with memory capacity and resource utilization efficiency. This paper introduces MemANNS, an innovative framework that utilizes UPMEM PIM architecture to address the memory bottlenecks in ANNS algorithms at scale. We concentrate on optimizing a well-known ANNS algorithm, IVFPQ, for PIM hardware through several techniques. First, we introduce an architecture-aware strategy for data placement and query scheduling that ensures an even distribution of workload across PIM chips, thereby maximizing the use of aggregated memory bandwidth. Additionally, we have developed an efficient thread scheduling mechanism that capitalizes on PIM's multi-threading capabilities and enhances memory management to boost cache efficiency. Moreover, we have recognized that real-world datasets often feature vectors with frequently co-occurring items. To address this, we propose a novel encoding method for IVFPQ that minimizes memory accesses during query processing. Our comprehensive evaluation using actual PIM hardware and real-world datasets at the billion-scale, show that MemANNS offers a significant 4.3x increase in QPS over CPU-based Faiss, and it matches the performance of GPU-based Faiss implementations. Additionally, MemANNS improves energy efficiency, with a 2.3x enhancement in QPS/Watt compared to GPU solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23805v1-abstract-full').style.display = 'none'; document.getElementById('2410.23805v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20047">arXiv:2410.20047</a> <span> [<a href="https://arxiv.org/pdf/2410.20047">pdf</a>, <a href="https://arxiv.org/format/2410.20047">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ResAD: A Simple Framework for Class Generalizable Anomaly Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xincheng Yao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zixin Chen</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+C">Chao Gao</a>, <a href="/search/cs?searchtype=author&query=Zhai%2C+G">Guangtao Zhai</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chongyang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20047v1-abstract-short" style="display: inline;"> This paper explores the problem of class-generalizable anomaly detection, where the objective is to train one unified AD model that can generalize to detect anomalies in diverse classes from different domains without any retraining or fine-tuning on the target data. Because normal feature representations vary significantly across classes, this will cause the widely studied one-for-one AD models to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20047v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20047v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20047v1-abstract-full" style="display: none;"> This paper explores the problem of class-generalizable anomaly detection, where the objective is to train one unified AD model that can generalize to detect anomalies in diverse classes from different domains without any retraining or fine-tuning on the target data. Because normal feature representations vary significantly across classes, this will cause the widely studied one-for-one AD models to be poorly classgeneralizable (i.e., performance drops dramatically when used for new classes). In this work, we propose a simple but effective framework (called ResAD) that can be directly applied to detect anomalies in new classes. Our main insight is to learn the residual feature distribution rather than the initial feature distribution. In this way, we can significantly reduce feature variations. Even in new classes, the distribution of normal residual features would not remarkably shift from the learned distribution. Therefore, the learned model can be directly adapted to new classes. ResAD consists of three components: (1) a Feature Converter that converts initial features into residual features; (2) a simple and shallow Feature Constraintor that constrains normal residual features into a spatial hypersphere for further reducing feature variations and maintaining consistency in feature scales among different classes; (3) a Feature Distribution Estimator that estimates the normal residual feature distribution, anomalies can be recognized as out-of-distribution. Despite the simplicity, ResAD can achieve remarkable anomaly detection results when directly used in new classes. The code is available at https://github.com/xcyao00/ResAD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20047v1-abstract-full').style.display = 'none'; document.getElementById('2410.20047v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper was accepted as a spotlight papaer by NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17333">arXiv:2410.17333</a> <span> [<a href="https://arxiv.org/pdf/2410.17333">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Are Large Language Models Ready for Travel Planning? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ren%2C+R">Ruiping Ren</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xing Yao</a>, <a href="/search/cs?searchtype=author&query=Cole%2C+S">Shu Cole</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haining Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17333v1-abstract-short" style="display: inline;"> While large language models (LLMs) show promise in hospitality and tourism, their ability to provide unbiased service across demographic groups remains unclear. This paper explores gender and ethnic biases when LLMs are utilized as travel planning assistants. To investigate this issue, we apply machine learning techniques to analyze travel suggestions generated from three open-source LLMs. Our fin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17333v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17333v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17333v1-abstract-full" style="display: none;"> While large language models (LLMs) show promise in hospitality and tourism, their ability to provide unbiased service across demographic groups remains unclear. This paper explores gender and ethnic biases when LLMs are utilized as travel planning assistants. To investigate this issue, we apply machine learning techniques to analyze travel suggestions generated from three open-source LLMs. Our findings reveal that the performance of race and gender classifiers substantially exceeds random chance, indicating differences in how LLMs engage with varied subgroups. Specifically, outputs align with cultural expectations tied to certain races and genders. To minimize the effect of these stereotypes, we used a stop-word classification strategy, which decreased identifiable differences, with no disrespectful terms found. However, hallucinations related to African American and gender minority groups were noted. In conclusion, while LLMs can generate travel plans seemingly free from bias, it remains essential to verify the accuracy and appropriateness of their recommendations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17333v1-abstract-full').style.display = 'none'; document.getElementById('2410.17333v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07677">arXiv:2410.07677</a> <span> [<a href="https://arxiv.org/pdf/2410.07677">pdf</a>, <a href="https://arxiv.org/format/2410.07677">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Smart Audit System Empowered by LLM </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xu Yao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiaoxu Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xi Li</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Huan Xu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chenlei Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+P">Ping Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Si Li</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xiaoning Ma</a>, <a href="/search/cs?searchtype=author&query=Shan%2C+J">Jiulong Shan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07677v1-abstract-short" style="display: inline;"> Manufacturing quality audits are pivotal for ensuring high product standards in mass production environments. Traditional auditing processes, however, are labor-intensive and reliant on human expertise, posing challenges in maintaining transparency, accountability, and continuous improvement across complex global supply chains. To address these challenges, we propose a smart audit system empowered… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07677v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07677v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07677v1-abstract-full" style="display: none;"> Manufacturing quality audits are pivotal for ensuring high product standards in mass production environments. Traditional auditing processes, however, are labor-intensive and reliant on human expertise, posing challenges in maintaining transparency, accountability, and continuous improvement across complex global supply chains. To address these challenges, we propose a smart audit system empowered by large language models (LLMs). Our approach introduces three innovations: a dynamic risk assessment model that streamlines audit procedures and optimizes resource allocation; a manufacturing compliance copilot that enhances data processing, retrieval, and evaluation for a self-evolving manufacturing knowledge base; and a Re-act framework commonality analysis agent that provides real-time, customized analysis to empower engineers with insights for supplier improvement. These enhancements elevate audit efficiency and effectiveness, with testing scenarios demonstrating an improvement of over 24%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07677v1-abstract-full').style.display = 'none'; document.getElementById('2410.07677v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.06509">arXiv:2410.06509</a> <span> [<a href="https://arxiv.org/pdf/2410.06509">pdf</a>, <a href="https://arxiv.org/format/2410.06509">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> PFAttack: Stealthy Attack Bypassing Group Fairness in Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jiashi Gao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Ziwei Wang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xiangyu Zhao</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xin Yao</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+X">Xuetao Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.06509v1-abstract-short" style="display: inline;"> Federated learning (FL), integrating group fairness mechanisms, allows multiple clients to collaboratively train a global model that makes unbiased decisions for different populations grouped by sensitive attributes (e.g., gender and race). Due to its distributed nature, previous studies have demonstrated that FL systems are vulnerable to model poisoning attacks. However, these studies primarily f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06509v1-abstract-full').style.display = 'inline'; document.getElementById('2410.06509v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.06509v1-abstract-full" style="display: none;"> Federated learning (FL), integrating group fairness mechanisms, allows multiple clients to collaboratively train a global model that makes unbiased decisions for different populations grouped by sensitive attributes (e.g., gender and race). Due to its distributed nature, previous studies have demonstrated that FL systems are vulnerable to model poisoning attacks. However, these studies primarily focus on perturbing accuracy, leaving a critical question unexplored: Can an attacker bypass the group fairness mechanisms in FL and manipulate the global model to be biased? The motivations for such an attack vary; an attacker might seek higher accuracy, yet fairness considerations typically limit the accuracy of the global model or aim to cause ethical disruption. To address this question, we design a novel form of attack in FL, termed Profit-driven Fairness Attack (PFATTACK), which aims not to degrade global model accuracy but to bypass fairness mechanisms. Our fundamental insight is that group fairness seeks to weaken the dependence of outputs on input attributes related to sensitive information. In the proposed PFATTACK, an attacker can recover this dependence through local fine-tuning across various sensitive groups, thereby creating a biased yet accuracy-preserving malicious model and injecting it into FL through model replacement. Compared to attacks targeting accuracy, PFATTACK is more stealthy. The malicious model in PFATTACK exhibits subtle parameter variations relative to the original global model, making it robust against detection and filtering by Byzantine-resilient aggregations. Extensive experiments on benchmark datasets are conducted for four fair FL frameworks and three Byzantine-resilient aggregations against model poisoning, demonstrating the effectiveness and stealth of PFATTACK in bypassing group fairness mechanisms in FL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.06509v1-abstract-full').style.display = 'none'; document.getElementById('2410.06509v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04648">arXiv:2410.04648</a> <span> [<a href="https://arxiv.org/pdf/2410.04648">pdf</a>, <a href="https://arxiv.org/format/2410.04648">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> AdaptDiff: Cross-Modality Domain Adaptation via Weak Conditional Semantic Diffusion for Retinal Vessel Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+D">Dewei Hu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Han Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xing Yao</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+D">Daiwei Lu</a>, <a href="/search/cs?searchtype=author&query=Oguz%2C+I">Ipek Oguz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04648v1-abstract-short" style="display: inline;"> Deep learning has shown remarkable performance in medical image segmentation. However, despite its promise, deep learning has many challenges in practice due to its inability to effectively transition to unseen domains, caused by the inherent data distribution shift and the lack of manual annotations to guide domain adaptation. To tackle this problem, we present an unsupervised domain adaptation (… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04648v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04648v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04648v1-abstract-full" style="display: none;"> Deep learning has shown remarkable performance in medical image segmentation. However, despite its promise, deep learning has many challenges in practice due to its inability to effectively transition to unseen domains, caused by the inherent data distribution shift and the lack of manual annotations to guide domain adaptation. To tackle this problem, we present an unsupervised domain adaptation (UDA) method named AdaptDiff that enables a retinal vessel segmentation network trained on fundus photography (FP) to produce satisfactory results on unseen modalities (e.g., OCT-A) without any manual labels. For all our target domains, we first adopt a segmentation model trained on the source domain to create pseudo-labels. With these pseudo-labels, we train a conditional semantic diffusion probabilistic model to represent the target domain distribution. Experimentally, we show that even with low quality pseudo-labels, the diffusion model can still capture the conditional semantic information. Subsequently, we sample on the target domain with binary vessel masks from the source domain to get paired data, i.e., target domain synthetic images conditioned on the binary vessel map. Finally, we fine-tune the pre-trained segmentation network using the synthetic paired data to mitigate the domain gap. We assess the effectiveness of AdaptDiff on seven publicly available datasets across three distinct modalities. Our results demonstrate a significant improvement in segmentation performance across all unseen datasets. Our code is publicly available at https://github.com/DeweiHu/AdaptDiff. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04648v1-abstract-full').style.display = 'none'; document.getElementById('2410.04648v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02682">arXiv:2410.02682</a> <span> [<a href="https://arxiv.org/pdf/2410.02682">pdf</a>, <a href="https://arxiv.org/format/2410.02682">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> EinDecomp: Decomposition of Declaratively-Specified Machine Learning and Numerical Computations for Parallel Execution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bourgeois%2C+D">Daniel Bourgeois</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Z">Zhimin Ding</a>, <a href="/search/cs?searchtype=author&query=Jankov%2C+D">Dimitrije Jankov</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiehui Li</a>, <a href="/search/cs?searchtype=author&query=Sleem%2C+M">Mahmoud Sleem</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yuxin Tang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+J">Jiawen Yao</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xinyu Yao</a>, <a href="/search/cs?searchtype=author&query=Jermaine%2C+C">Chris Jermaine</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02682v1-abstract-short" style="display: inline;"> We consider the problem of automatically decomposing operations over tensors or arrays so that they can be executed in parallel on multiple devices. We address two, closely-linked questions. First, what programming abstraction should systems for tensor-based computing offer to enable such decompositions? Second, given that abstraction, how should such systems automatically decompose a tensor-based… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02682v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02682v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02682v1-abstract-full" style="display: none;"> We consider the problem of automatically decomposing operations over tensors or arrays so that they can be executed in parallel on multiple devices. We address two, closely-linked questions. First, what programming abstraction should systems for tensor-based computing offer to enable such decompositions? Second, given that abstraction, how should such systems automatically decompose a tensor-based computation? We assert that tensor-based systems should offer a programming abstraction based on an extended Einstein summation notation, which is a fully declarative, mathematical specification for tensor computations. We show that any computation specified in the Einstein summation notation can be re-written into an equivalent tensor-relational computation, and this re-write generalizes existing notations of tensor parallelism such as "data parallel'' and "model parallel.'' We consider the algorithmic problem of optimally computing a tensor-relational decomposition of a graph of operations specified in our extended Einstein summation notation, and we experimentally show the value of the algorithm that we develop. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02682v1-abstract-full').style.display = 'none'; document.getElementById('2410.02682v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02247">arXiv:2410.02247</a> <span> [<a href="https://arxiv.org/pdf/2410.02247">pdf</a>, <a href="https://arxiv.org/format/2410.02247">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Theoretical Insights into Fine-Tuning Attention Mechanism: Generalization and Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xinhao Yao</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+H">Hongjin Qian</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xiaolin Hu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+G">Gengze Xu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yong Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02247v1-abstract-short" style="display: inline;"> Large Language Models (LLMs), built on Transformer architectures, exhibit remarkable generalization across a wide range of tasks. However, fine-tuning these models for specific tasks remains resource-intensive due to their extensive parameterization. In this paper, we investigate two remarkable phenomena observed during the fine-tuning of LLMs, particularly focusing on the attention mechanism: (1)… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02247v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02247v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02247v1-abstract-full" style="display: none;"> Large Language Models (LLMs), built on Transformer architectures, exhibit remarkable generalization across a wide range of tasks. However, fine-tuning these models for specific tasks remains resource-intensive due to their extensive parameterization. In this paper, we investigate two remarkable phenomena observed during the fine-tuning of LLMs, particularly focusing on the attention mechanism: (1) Different Impact, optimizing the $\mathbf{W}_v$ matrix significantly improves performance over optimizing the $\mathbf{W}_k$ matrix. Fine-tuning only the $\mathbf{W}_q$ and $\mathbf{W}_v$ matrices is computationally efficient, delivering results that are comparable to, or even better than, fine-tuning all three matrices $\mathbf{W}_q$, $\mathbf{W}_k$, and $\mathbf{W}_v$. (2) Efficient Convergence, employing distinct learning rates for these matrices is crucial for optimal performance, with a higher learning rate for the $\mathbf{W}_v$ matrix expediting convergence. However, theoretical analyses of these phenomena are still relatively limited. We present a theoretical analysis of these phenomena from two perspectives: (i) Generalization, where we demonstrate that fine-tuning only $\mathbf{W}_q$ and $\mathbf{W}_v$ improves generalization bounds, enhances memory efficiency, and (ii) Optimization, where we emphasize that the feature learning of the attention mechanism is efficient, particularly when using distinct learning rates for the matrices, which leads to more effective fine-tuning. Building on these insights, we propose a new strategy that improves fine-tuning efficiency in terms of both storage and time. Experimental results on benchmark datasets validate the effectiveness of this approach, supporting our theoretical findings. Our analysis lays the theoretical groundwork for configuring and improving lightweight algorithms in LLMs fine-tuning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02247v1-abstract-full').style.display = 'none'; document.getElementById('2410.02247v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.18499">arXiv:2409.18499</a> <span> [<a href="https://arxiv.org/pdf/2409.18499">pdf</a>, <a href="https://arxiv.org/format/2409.18499">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TEVC.2024.3430824">10.1109/TEVC.2024.3430824 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Fairness-aware Multiobjective Evolutionary Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qingquan Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jialin Liu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xin Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.18499v1-abstract-short" style="display: inline;"> Multiobjective evolutionary learning (MOEL) has demonstrated its advantages of training fairer machine learning models considering a predefined set of conflicting objectives, including accuracy and different fairness measures. Recent works propose to construct a representative subset of fairness measures as optimisation objectives of MOEL throughout model training. However, the determination of a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18499v1-abstract-full').style.display = 'inline'; document.getElementById('2409.18499v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.18499v1-abstract-full" style="display: none;"> Multiobjective evolutionary learning (MOEL) has demonstrated its advantages of training fairer machine learning models considering a predefined set of conflicting objectives, including accuracy and different fairness measures. Recent works propose to construct a representative subset of fairness measures as optimisation objectives of MOEL throughout model training. However, the determination of a representative measure set relies on dataset, prior knowledge and requires substantial computational costs. What's more, those representative measures may differ across different model training processes. Instead of using a static predefined set determined before model training, this paper proposes to dynamically and adaptively determine a representative measure set online during model training. The dynamically determined representative set is then used as optimising objectives of the MOEL framework and can vary with time. Extensive experimental results on 12 well-known benchmark datasets demonstrate that our proposed framework achieves outstanding performance compared to state-of-the-art approaches for mitigating unfairness in terms of accuracy as well as 25 fairness measures although only a few of them were dynamically selected and used as optimisation objectives. The results indicate the importance of setting optimisation objectives dynamically during training. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18499v1-abstract-full').style.display = 'none'; document.getElementById('2409.18499v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Evolutionary Computation (2014) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11414">arXiv:2409.11414</a> <span> [<a href="https://arxiv.org/pdf/2409.11414">pdf</a>, <a href="https://arxiv.org/format/2409.11414">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> RTLRewriter: Methodologies for Large Models aided RTL Code Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xufeng Yao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yiwen Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xing Li</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+Y">Yingzhao Lian</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+R">Ran Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Lei Chen</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+M">Mingxuan Yuan</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hong Xu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+B">Bei Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11414v1-abstract-short" style="display: inline;"> Register Transfer Level (RTL) code optimization is crucial for enhancing the efficiency and performance of digital circuits during early synthesis stages. Currently, optimization relies heavily on manual efforts by skilled engineers, often requiring multiple iterations based on synthesis feedback. In contrast, existing compiler-based methods fall short in addressing complex designs. This paper int… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11414v1-abstract-full').style.display = 'inline'; document.getElementById('2409.11414v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11414v1-abstract-full" style="display: none;"> Register Transfer Level (RTL) code optimization is crucial for enhancing the efficiency and performance of digital circuits during early synthesis stages. Currently, optimization relies heavily on manual efforts by skilled engineers, often requiring multiple iterations based on synthesis feedback. In contrast, existing compiler-based methods fall short in addressing complex designs. This paper introduces RTLRewriter, an innovative framework that leverages large models to optimize RTL code. A circuit partition pipeline is utilized for fast synthesis and efficient rewriting. A multi-modal program analysis is proposed to incorporate vital visual diagram information as optimization cues. A specialized search engine is designed to identify useful optimization guides, algorithms, and code snippets that enhance the model ability to generate optimized RTL. Additionally, we introduce a Cost-aware Monte Carlo Tree Search (C-MCTS) algorithm for efficient rewriting, managing diverse retrieved contents and steering the rewriting results. Furthermore, a fast verification pipeline is proposed to reduce verification cost. To cater to the needs of both industry and academia, we propose two benchmarking suites: the Large Rewriter Benchmark, targeting complex scenarios with extensive circuit partitioning, optimization trade-offs, and verification challenges, and the Small Rewriter Benchmark, designed for a wider range of scenarios and patterns. Our comparative analysis with established compilers such as Yosys and E-graph demonstrates significant improvements, highlighting the benefits of integrating large models into the early stages of circuit design. We provide our benchmarks at https://github.com/yaoxufeng/RTLRewriter-Bench. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11414v1-abstract-full').style.display = 'none'; document.getElementById('2409.11414v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICCAD2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.07723">arXiv:2409.07723</a> <span> [<a href="https://arxiv.org/pdf/2409.07723">pdf</a>, <a href="https://arxiv.org/format/2409.07723">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Advancing Depth Anything Model for Unsupervised Monocular Depth Estimation in Endoscopy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+B">Bojian Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+B">Bo Liu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xinning Yao</a>, <a href="/search/cs?searchtype=author&query=Yue%2C+J">Jinghua Yue</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+F">Fugen Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.07723v2-abstract-short" style="display: inline;"> Depth estimation is a cornerstone of 3D reconstruction and plays a vital role in minimally invasive endoscopic surgeries. However, most current depth estimation networks rely on traditional convolutional neural networks, which are limited in their ability to capture global information. Foundation models offer a promising approach to enhance depth estimation, but those models currently available ar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07723v2-abstract-full').style.display = 'inline'; document.getElementById('2409.07723v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.07723v2-abstract-full" style="display: none;"> Depth estimation is a cornerstone of 3D reconstruction and plays a vital role in minimally invasive endoscopic surgeries. However, most current depth estimation networks rely on traditional convolutional neural networks, which are limited in their ability to capture global information. Foundation models offer a promising approach to enhance depth estimation, but those models currently available are primarily trained on natural images, leading to suboptimal performance when applied to endoscopic images. In this work, we introduce a novel fine-tuning strategy for the Depth Anything Model and integrate it with an intrinsic-based unsupervised monocular depth estimation framework. Our approach includes a low-rank adaptation technique based on random vectors, which improves the model's adaptability to different scales. Additionally, we propose a residual block built on depthwise separable convolution to compensate for the transformer's limited ability to capture local features. Our experimental results on the SCARED dataset and Hamlyn dataset show that our method achieves state-of-the-art performance while minimizing the number of trainable parameters. Applying this method in minimally invasive endoscopic surgery can enhance surgeons' spatial awareness, thereby improving the precision and safety of the procedures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.07723v2-abstract-full').style.display = 'none'; document.getElementById('2409.07723v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.13978">arXiv:2408.13978</a> <span> [<a href="https://arxiv.org/pdf/2408.13978">pdf</a>, <a href="https://arxiv.org/format/2408.13978">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Histology Virtual Staining with Mask-Guided Adversarial Transfer Learning for Tertiary Lymphoid Structure Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qiuli Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yongxu Liu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Li Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xianqi Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wei Chen</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xiaohong Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.13978v1-abstract-short" style="display: inline;"> Histological Tertiary Lymphoid Structures (TLSs) are increasingly recognized for their correlation with the efficacy of immunotherapy in various solid tumors. Traditionally, the identification and characterization of TLSs rely on immunohistochemistry (IHC) staining techniques, utilizing markers such as CD20 for B cells. Despite the specificity of IHC, Hematoxylin-Eosin (H&E) staining offers a more… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13978v1-abstract-full').style.display = 'inline'; document.getElementById('2408.13978v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.13978v1-abstract-full" style="display: none;"> Histological Tertiary Lymphoid Structures (TLSs) are increasingly recognized for their correlation with the efficacy of immunotherapy in various solid tumors. Traditionally, the identification and characterization of TLSs rely on immunohistochemistry (IHC) staining techniques, utilizing markers such as CD20 for B cells. Despite the specificity of IHC, Hematoxylin-Eosin (H&E) staining offers a more accessible and cost-effective choice. Capitalizing on the prevalence of H&E staining slides, we introduce a novel Mask-Guided Adversarial Transfer Learning method designed for virtual pathological staining. This method adeptly captures the nuanced color variations across diverse tissue types under various staining conditions, such as nucleus, red blood cells, positive reaction regions, without explicit label information, and adeptly synthesizes realistic IHC-like virtual staining patches, even replicating the positive reaction. Further, we propose the Virtual IHC Pathology Analysis Network (VIPA-Net), an integrated framework encompassing a Mask-Guided Transfer Module and an H&E-Based Virtual Staining TLS Detection Module. VIPA-Net synergistically harnesses both H\&E staining slides and the synthesized virtual IHC patches to enhance the detection of TLSs within H&E Whole Slide Images (WSIs). We evaluate the network with a comprehensive dataset comprising 1019 annotated slides from The Cancer Genome Atlas (TCGA). Experimental results compellingly illustrate that the VIPA-Net substantially elevates TLS detection accuracy, effectively circumventing the need for actual CD20 staining across the public dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13978v1-abstract-full').style.display = 'none'; document.getElementById('2408.13978v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05617">arXiv:2408.05617</a> <span> [<a href="https://arxiv.org/pdf/2408.05617">pdf</a>, <a href="https://arxiv.org/format/2408.05617">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Residual-INR: Communication Efficient On-Device Learning Using Implicit Neural Representation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hanqiu Chen</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xuebin Yao</a>, <a href="/search/cs?searchtype=author&query=Subedi%2C+P">Pradeep Subedi</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+C">Cong Hao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05617v3-abstract-short" style="display: inline;"> Edge computing is a distributed computing paradigm that collects and processes data at or near the source of data generation. The on-device learning at edge relies on device-to-device wireless communication to facilitate real-time data sharing and collaborative decision-making among multiple devices. This significantly improves the adaptability of the edge computing system to the changing environm… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05617v3-abstract-full').style.display = 'inline'; document.getElementById('2408.05617v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05617v3-abstract-full" style="display: none;"> Edge computing is a distributed computing paradigm that collects and processes data at or near the source of data generation. The on-device learning at edge relies on device-to-device wireless communication to facilitate real-time data sharing and collaborative decision-making among multiple devices. This significantly improves the adaptability of the edge computing system to the changing environments. However, as the scale of the edge computing system is getting larger, communication among devices is becoming the bottleneck because of the limited bandwidth of wireless communication leads to large data transfer latency. To reduce the amount of device-to-device data transmission and accelerate on-device learning, in this paper, we propose Residual-INR, a fog computing-based communication-efficient on-device learning framework by utilizing implicit neural representation (INR) to compress images/videos into neural network weights. Residual-INR enhances data transfer efficiency by collecting JPEG images from edge devices, compressing them into INR format at the fog node, and redistributing them for on-device learning. By using a smaller INR for full image encoding and a separate object INR for high-quality object region reconstruction through residual encoding, our technique can reduce the encoding redundancy while maintaining the object quality. Residual-INR is a promising solution for edge on-device learning because it reduces data transmission by up to 5.16 x across a network of 10 edge devices. It also facilitates CPU-free accelerated on-device learning, achieving up to 2.9 x speedup without sacrificing accuracy. Our code is available at: https://github.com/sharc-lab/Residual-INR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05617v3-abstract-full').style.display = 'none'; document.getElementById('2408.05617v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by ICCAD 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.05372">arXiv:2408.05372</a> <span> [<a href="https://arxiv.org/pdf/2408.05372">pdf</a>, <a href="https://arxiv.org/format/2408.05372">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PRISM Lite: A lightweight model for interactive 3D placenta segmentation in ultrasound </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&query=Oguz%2C+B">Baris Oguz</a>, <a href="/search/cs?searchtype=author&query=Arenas%2C+G">Gabriel Arenas</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xing Yao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Pouch%2C+A">Alison Pouch</a>, <a href="/search/cs?searchtype=author&query=Byram%2C+B">Brett Byram</a>, <a href="/search/cs?searchtype=author&query=Schwartz%2C+N">Nadav Schwartz</a>, <a href="/search/cs?searchtype=author&query=Oguz%2C+I">Ipek Oguz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.05372v1-abstract-short" style="display: inline;"> Placenta volume measured from 3D ultrasound (3DUS) images is an important tool for tracking the growth trajectory and is associated with pregnancy outcomes. Manual segmentation is the gold standard, but it is time-consuming and subjective. Although fully automated deep learning algorithms perform well, they do not always yield high-quality results for each case. Interactive segmentation models cou… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05372v1-abstract-full').style.display = 'inline'; document.getElementById('2408.05372v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.05372v1-abstract-full" style="display: none;"> Placenta volume measured from 3D ultrasound (3DUS) images is an important tool for tracking the growth trajectory and is associated with pregnancy outcomes. Manual segmentation is the gold standard, but it is time-consuming and subjective. Although fully automated deep learning algorithms perform well, they do not always yield high-quality results for each case. Interactive segmentation models could address this issue. However, there is limited work on interactive segmentation models for the placenta. Despite their segmentation accuracy, these methods may not be feasible for clinical use as they require relatively large computational power which may be especially prohibitive in low-resource environments, or on mobile devices. In this paper, we propose a lightweight interactive segmentation model aiming for clinical use to interactively segment the placenta from 3DUS images in real-time. The proposed model adopts the segmentation from our fully automated model for initialization and is designed in a human-in-the-loop manner to achieve iterative improvements. The Dice score and normalized surface Dice are used as evaluation metrics. The results show that our model can achieve superior performance in segmentation compared to state-of-the-art models while using significantly fewer parameters. Additionally, the proposed model is much faster for inference and robust to poor initial masks. The code is available at https://github.com/MedICL-VU/PRISM-placenta. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.05372v1-abstract-full').style.display = 'none'; document.getElementById('2408.05372v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.20272">arXiv:2407.20272</a> <span> [<a href="https://arxiv.org/pdf/2407.20272">pdf</a>, <a href="https://arxiv.org/format/2407.20272">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> An Efficient Inference Framework for Early-exit Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Miao%2C+R">Ruijie Miao</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yihan Yan</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xinshuo Yao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+T">Tong Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.20272v1-abstract-short" style="display: inline;"> Building efficient inference framework has gained increasing interests for research community. Early-exit models, a variant of LLMs, improves the inference efficiency of LLMs by skipping rest layers and directly generate output tokens when they are confident enough. However, there is no work of LLM inference framework that takes early-exit models into consideration. This is non-trivial as prior ar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20272v1-abstract-full').style.display = 'inline'; document.getElementById('2407.20272v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.20272v1-abstract-full" style="display: none;"> Building efficient inference framework has gained increasing interests for research community. Early-exit models, a variant of LLMs, improves the inference efficiency of LLMs by skipping rest layers and directly generate output tokens when they are confident enough. However, there is no work of LLM inference framework that takes early-exit models into consideration. This is non-trivial as prior art on LLM inference cannot be directly applied to early-exit models. In this work, we solves two key challenges in building efficient inference framework for early-exit models: (1) batch inference at iteration-level granularity; and (2) KV cache management. For the former, we propose to process the batch until all sequences surpass the early-exit confidence threshold. For the latter, we propose to fill the KV cache of rest layers before the iteration terminates. Our evaluation shows that, compared with the original vLLM operating at full layers, our solution achieves up to 1.25x speed up. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.20272v1-abstract-full').style.display = 'none'; document.getElementById('2407.20272v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.18362">arXiv:2407.18362</a> <span> [<a href="https://arxiv.org/pdf/2407.18362">pdf</a>, <a href="https://arxiv.org/format/2407.18362">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Retinal IPA: Iterative KeyPoints Alignment for Multimodal Retinal Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+D">Dewei Hu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+R">Rui Xu</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xing Yao</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+Y+K">Yuankai K. Tao</a>, <a href="/search/cs?searchtype=author&query=Oguz%2C+I">Ipek Oguz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.18362v1-abstract-short" style="display: inline;"> We propose a novel framework for retinal feature point alignment, designed for learning cross-modality features to enhance matching and registration across multi-modality retinal images. Our model draws on the success of previous learning-based feature detection and description methods. To better leverage unlabeled data and constrain the model to reproduce relevant keypoints, we integrate a keypoi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18362v1-abstract-full').style.display = 'inline'; document.getElementById('2407.18362v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.18362v1-abstract-full" style="display: none;"> We propose a novel framework for retinal feature point alignment, designed for learning cross-modality features to enhance matching and registration across multi-modality retinal images. Our model draws on the success of previous learning-based feature detection and description methods. To better leverage unlabeled data and constrain the model to reproduce relevant keypoints, we integrate a keypoint-based segmentation task. It is trained in a self-supervised manner by enforcing segmentation consistency between different augmentations of the same image. By incorporating a keypoint augmented self-supervised layer, we achieve robust feature extraction across modalities. Extensive evaluation on two public datasets and one in-house dataset demonstrates significant improvements in performance for modality-agnostic retinal feature alignment. Our code and model weights are publicly available at \url{https://github.com/MedICL-VU/RetinaIPA}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18362v1-abstract-full').style.display = 'none'; document.getElementById('2407.18362v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.17518">arXiv:2407.17518</a> <span> [<a href="https://arxiv.org/pdf/2407.17518">pdf</a>, <a href="https://arxiv.org/format/2407.17518">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Driving pattern interpretation based on action phases clustering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xue Yao</a>, <a href="/search/cs?searchtype=author&query=Calvert%2C+S+C">Simeon C. Calvert</a>, <a href="/search/cs?searchtype=author&query=Hoogendoorn%2C+S+P">Serge P. Hoogendoorn</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.17518v1-abstract-short" style="display: inline;"> Current approaches to identifying driving heterogeneity face challenges in comprehending fundamental patterns from the perspective of underlying driving behavior mechanisms. The concept of Action phases was proposed in our previous work, capturing the diversity of driving characteristics with physical meanings. This study presents a novel framework to further interpret driving patterns by classify… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17518v1-abstract-full').style.display = 'inline'; document.getElementById('2407.17518v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.17518v1-abstract-full" style="display: none;"> Current approaches to identifying driving heterogeneity face challenges in comprehending fundamental patterns from the perspective of underlying driving behavior mechanisms. The concept of Action phases was proposed in our previous work, capturing the diversity of driving characteristics with physical meanings. This study presents a novel framework to further interpret driving patterns by classifying Action phases in an unsupervised manner. In this framework, a Resampling and Downsampling Method (RDM) is first applied to standardize the length of Action phases. Then the clustering calibration procedure including ''Feature Selection'', ''Clustering Analysis'', ''Difference/Similarity Evaluation'', and ''Action phases Re-extraction'' is iteratively applied until all differences among clusters and similarities within clusters reach the pre-determined criteria. Application of the framework using real-world datasets revealed six driving patterns in the I80 dataset, labeled as ''Catch up'', ''Keep away'', and ''Maintain distance'', with both ''Stable'' and ''Unstable'' states. Notably, Unstable patterns are more numerous than Stable ones. ''Maintain distance'' is the most common among Stable patterns. These observations align with the dynamic nature of driving. Two patterns ''Stable keep away'' and ''Unstable catch up'' are missing in the US101 dataset, which is in line with our expectations as this dataset was previously shown to have less heterogeneity. This demonstrates the potential of driving patterns in describing driving heterogeneity. The proposed framework promises advantages in addressing label scarcity in supervised learning and enhancing tasks such as driving behavior modeling and driving trajectory prediction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.17518v1-abstract-full').style.display = 'none'; document.getElementById('2407.17518v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.12025">arXiv:2407.12025</a> <span> [<a href="https://arxiv.org/pdf/2407.12025">pdf</a>, <a href="https://arxiv.org/format/2407.12025">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LLM4DESIGN: An Automated Multi-Modal System for Architectural and Environmental Design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+R">Ran Chen</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xueqi Yao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+X">Xuhui Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.12025v1-abstract-short" style="display: inline;"> This study introduces LLM4DESIGN, a highly automated system for generating architectural and environmental design proposals. LLM4DESIGN, relying solely on site conditions and design requirements, employs Multi-Agent systems to foster creativity, Retrieval Augmented Generation (RAG) to ground designs in realism, and Visual Language Models (VLM) to synchronize all information. This system resulting… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12025v1-abstract-full').style.display = 'inline'; document.getElementById('2407.12025v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.12025v1-abstract-full" style="display: none;"> This study introduces LLM4DESIGN, a highly automated system for generating architectural and environmental design proposals. LLM4DESIGN, relying solely on site conditions and design requirements, employs Multi-Agent systems to foster creativity, Retrieval Augmented Generation (RAG) to ground designs in realism, and Visual Language Models (VLM) to synchronize all information. This system resulting in coherent, multi-illustrated, and multi-textual design schemes. The system meets the dual needs of narrative storytelling and objective drawing presentation in generating architectural and environmental design proposals. Extensive comparative and ablation experiments confirm the innovativeness of LLM4DESIGN's narrative and the grounded applicability of its plans, demonstrating its superior performance in the field of urban renewal design. Lastly, we have created the first cross-modal design scheme dataset covering architecture, landscape, interior, and urban design, providing rich resources for future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.12025v1-abstract-full').style.display = 'none'; document.getElementById('2407.12025v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.08745">arXiv:2407.08745</a> <span> [<a href="https://arxiv.org/pdf/2407.08745">pdf</a>, <a href="https://arxiv.org/format/2407.08745">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Evolutionary Computation for the Design and Enrichment of General-Purpose Artificial Intelligence Systems: Survey and Prospects </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Poyatos%2C+J">Javier Poyatos</a>, <a href="/search/cs?searchtype=author&query=Del+Ser%2C+J">Javier Del Ser</a>, <a href="/search/cs?searchtype=author&query=Garcia%2C+S">Salvador Garcia</a>, <a href="/search/cs?searchtype=author&query=Ishibuchi%2C+H">Hisao Ishibuchi</a>, <a href="/search/cs?searchtype=author&query=Molina%2C+D">Daniel Molina</a>, <a href="/search/cs?searchtype=author&query=Triguero%2C+I">Isaac Triguero</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+B">Bing Xue</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xin Yao</a>, <a href="/search/cs?searchtype=author&query=Herrera%2C+F">Francisco Herrera</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.08745v1-abstract-short" style="display: inline;"> In Artificial Intelligence, there is an increasing demand for adaptive models capable of dealing with a diverse spectrum of learning tasks, surpassing the limitations of systems devised to cope with a single task. The recent emergence of General-Purpose Artificial Intelligence Systems (GPAIS) poses model configuration and adaptability challenges at far greater complexity scales than the optimal de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08745v1-abstract-full').style.display = 'inline'; document.getElementById('2407.08745v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.08745v1-abstract-full" style="display: none;"> In Artificial Intelligence, there is an increasing demand for adaptive models capable of dealing with a diverse spectrum of learning tasks, surpassing the limitations of systems devised to cope with a single task. The recent emergence of General-Purpose Artificial Intelligence Systems (GPAIS) poses model configuration and adaptability challenges at far greater complexity scales than the optimal design of traditional Machine Learning models. Evolutionary Computation (EC) has been a useful tool for both the design and optimization of Machine Learning models, endowing them with the capability to configure and/or adapt themselves to the task under consideration. Therefore, their application to GPAIS is a natural choice. This paper aims to analyze the role of EC in the field of GPAIS, exploring the use of EC for their design or enrichment. We also match GPAIS properties to Machine Learning areas in which EC has had a notable contribution, highlighting recent milestones of EC for GPAIS. Furthermore, we discuss the challenges of harnessing the benefits of EC for GPAIS, presenting different strategies to both design and improve GPAIS with EC, covering tangential areas, identifying research niches, and outlining potential research directions for EC and GPAIS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08745v1-abstract-full').style.display = 'none'; document.getElementById('2407.08745v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.08020">arXiv:2407.08020</a> <span> [<a href="https://arxiv.org/pdf/2407.08020">pdf</a>, <a href="https://arxiv.org/format/2407.08020">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Interactive Segmentation Model for Placenta Segmentation from 3D Ultrasound images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&query=Oguz%2C+B">Baris Oguz</a>, <a href="/search/cs?searchtype=author&query=Arenas%2C+G">Gabriel Arenas</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xing Yao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Pouch%2C+A">Alison Pouch</a>, <a href="/search/cs?searchtype=author&query=Byram%2C+B">Brett Byram</a>, <a href="/search/cs?searchtype=author&query=Schwartz%2C+N">Nadav Schwartz</a>, <a href="/search/cs?searchtype=author&query=Oguz%2C+I">Ipek Oguz</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.08020v1-abstract-short" style="display: inline;"> Placenta volume measurement from 3D ultrasound images is critical for predicting pregnancy outcomes, and manual annotation is the gold standard. However, such manual annotation is expensive and time-consuming. Automated segmentation algorithms can often successfully segment the placenta, but these methods may not consistently produce robust segmentations suitable for practical use. Recently, inspi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08020v1-abstract-full').style.display = 'inline'; document.getElementById('2407.08020v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.08020v1-abstract-full" style="display: none;"> Placenta volume measurement from 3D ultrasound images is critical for predicting pregnancy outcomes, and manual annotation is the gold standard. However, such manual annotation is expensive and time-consuming. Automated segmentation algorithms can often successfully segment the placenta, but these methods may not consistently produce robust segmentations suitable for practical use. Recently, inspired by the Segment Anything Model (SAM), deep learning-based interactive segmentation models have been widely applied in the medical imaging domain. These models produce a segmentation from visual prompts provided to indicate the target region, which may offer a feasible solution for practical use. However, none of these models are specifically designed for interactively segmenting 3D ultrasound images, which remain challenging due to the inherent noise of this modality. In this paper, we evaluate publicly available state-of-the-art 3D interactive segmentation models in contrast to a human-in-the-loop approach for the placenta segmentation task. The Dice score, normalized surface Dice, averaged symmetric surface distance, and 95-percent Hausdorff distance are used as evaluation metrics. We consider a Dice score of 0.95 a successful segmentation. Our results indicate that the human-in-the-loop segmentation model reaches this standard. Moreover, we assess the efficiency of the human-in-the-loop model as a function of the amount of prompts. Our results demonstrate that the human-in-the-loop model is both effective and efficient for interactive placenta segmentation. The code is available at \url{https://github.com/MedICL-VU/PRISM-placenta}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.08020v1-abstract-full').style.display = 'none'; document.getElementById('2407.08020v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.04493">arXiv:2407.04493</a> <span> [<a href="https://arxiv.org/pdf/2407.04493">pdf</a>, <a href="https://arxiv.org/format/2407.04493">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s10994-024-06575-2">10.1007/s10994-024-06575-2 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> PROUD: PaRetO-gUided Diffusion Model for Multi-objective Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+Y">Yinghua Yao</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+Y">Yuangang Pan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jing Li</a>, <a href="/search/cs?searchtype=author&query=Tsang%2C+I">Ivor Tsang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xin Yao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.04493v1-abstract-short" style="display: inline;"> Recent advancements in the realm of deep generative models focus on generating samples that satisfy multiple desired properties. However, prevalent approaches optimize these property functions independently, thus omitting the trade-offs among them. In addition, the property optimization is often improperly integrated into the generative models, resulting in an unnecessary compromise on generation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04493v1-abstract-full').style.display = 'inline'; document.getElementById('2407.04493v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.04493v1-abstract-full" style="display: none;"> Recent advancements in the realm of deep generative models focus on generating samples that satisfy multiple desired properties. However, prevalent approaches optimize these property functions independently, thus omitting the trade-offs among them. In addition, the property optimization is often improperly integrated into the generative models, resulting in an unnecessary compromise on generation quality (i.e., the quality of generated samples). To address these issues, we formulate a constrained optimization problem. It seeks to optimize generation quality while ensuring that generated samples reside at the Pareto front of multiple property objectives. Such a formulation enables the generation of samples that cannot be further improved simultaneously on the conflicting property functions and preserves good quality of generated samples. Building upon this formulation, we introduce the PaRetO-gUided Diffusion model (PROUD), wherein the gradients in the denoising process are dynamically adjusted to enhance generation quality while the generated samples adhere to Pareto optimality. Experimental evaluations on image generation and protein generation tasks demonstrate that our PROUD consistently maintains superior generation quality while approaching Pareto optimality across multiple property functions compared to various baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.04493v1-abstract-full').style.display = 'none'; document.getElementById('2407.04493v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Machine Learning 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.03566">arXiv:2407.03566</a> <span> [<a href="https://arxiv.org/pdf/2407.03566">pdf</a>, <a href="https://arxiv.org/ps/2407.03566">ps</a>, <a href="https://arxiv.org/format/2407.03566">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Stacked Intelligent Metasurfaces for Wireless Sensing and Communication: Applications and Challenges </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hao Liu</a>, <a href="/search/cs?searchtype=author&query=An%2C+J">Jiancheng An</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+X">Xing Jia</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+S">Shining Lin</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xianghao Yao</a>, <a href="/search/cs?searchtype=author&query=Gan%2C+L">Lu Gan</a>, <a href="/search/cs?searchtype=author&query=Clerckx%2C+B">Bruno Clerckx</a>, <a href="/search/cs?searchtype=author&query=Yuen%2C+C">Chau Yuen</a>, <a href="/search/cs?searchtype=author&query=Bennis%2C+M">Mehdi Bennis</a>, <a href="/search/cs?searchtype=author&query=Debbah%2C+M">M茅rouane Debbah</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.03566v1-abstract-short" style="display: inline;"> The rapid advancement of wireless communication technologies has precipitated an unprecedented demand for high data rates, extremely low latency, and ubiquitous connectivity. In order to achieve these goals, stacked intelligent metasurfaces (SIM) has been developed as a novel solution to perform advanced signal processing tasks directly in the electromagnetic wave domain, thus achieving ultra-fast… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03566v1-abstract-full').style.display = 'inline'; document.getElementById('2407.03566v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.03566v1-abstract-full" style="display: none;"> The rapid advancement of wireless communication technologies has precipitated an unprecedented demand for high data rates, extremely low latency, and ubiquitous connectivity. In order to achieve these goals, stacked intelligent metasurfaces (SIM) has been developed as a novel solution to perform advanced signal processing tasks directly in the electromagnetic wave domain, thus achieving ultra-fast computing speed and reducing hardware complexity. This article provides an overview of the SIM technology by discussing its hardware architectures, advantages, and potential applications for wireless sensing and communication. Specifically, we explore the utilization of SIMs in enabling wave-domain beamforming, channel modeling and estimation in SIM-assisted communication systems. Furthermore, we elaborate on the potential of utilizing a SIM to build a hybrid optical-electronic neural network (HOENN) and demonstrate its efficacy by examining two case studies: disaster monitoring and direction-of-arrival estimation. Finally, we identify key implementation challenges, including practical hardware imperfections, efficient SIM configuration for realizing wave-domain signal processing, and performance analysis to motivate future research on this important and far-reaching topic. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03566v1-abstract-full').style.display = 'none'; document.getElementById('2407.03566v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 5 figures, 1 table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.02521">arXiv:2407.02521</a> <span> [<a href="https://arxiv.org/pdf/2407.02521">pdf</a>, <a href="https://arxiv.org/format/2407.02521">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Performance Comparison of Deep RL Algorithms for Mixed Traffic Cooperative Lane-Changing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+X">Xue Yao</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+S">Shengren Hou</a>, <a href="/search/cs?searchtype=author&query=Hoogendoorn%2C+S+P">Serge P. Hoogendoorn</a>, <a href="/search/cs?searchtype=author&query=Calvert%2C+S+C">Simeon C. Calvert</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.02521v1-abstract-short" style="display: inline;"> Lane-changing (LC) is a challenging scenario for connected and automated vehicles (CAVs) because of the complex dynamics and high uncertainty of the traffic environment. This challenge can be handled by deep reinforcement learning (DRL) approaches, leveraging their data-driven and model-free nature. Our previous work proposed a cooperative lane-changing in mixed traffic (CLCMT) mechanism based on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02521v1-abstract-full').style.display = 'inline'; document.getElementById('2407.02521v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.02521v1-abstract-full" style="display: none;"> Lane-changing (LC) is a challenging scenario for connected and automated vehicles (CAVs) because of the complex dynamics and high uncertainty of the traffic environment. This challenge can be handled by deep reinforcement learning (DRL) approaches, leveraging their data-driven and model-free nature. Our previous work proposed a cooperative lane-changing in mixed traffic (CLCMT) mechanism based on TD3 to facilitate an optimal lane-changing strategy. This study enhances the current CLCMT mechanism by considering both the uncertainty of the human-driven vehicles (HVs) and the microscopic interactions between HVs and CAVs. The state-of-the-art (SOTA) DRL algorithms including DDPG, TD3, SAC, and PPO are utilized to deal with the formulated MDP with continuous actions. Performance comparison among the four DRL algorithms demonstrates that DDPG, TD3, and PPO algorithms can deal with uncertainty in traffic environments and learn well-performed LC strategies in terms of safety, efficiency, comfort, and ecology. The PPO algorithm outperforms the other three algorithms, regarding a higher reward, fewer exploration mistakes and crashes, and a more comfortable and ecology LC strategy. The improvements promise CLCMT mechanism greater advantages in the LC motion planning of CAVs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.02521v1-abstract-full').style.display = 'none'; document.getElementById('2407.02521v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 5 figures, IEEE conference</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Yao%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Yao%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Yao%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Yao%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Yao%2C+X&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Yao%2C+X&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li> <a href="/search/?searchtype=author&query=Yao%2C+X&start=250" class="pagination-link " aria-label="Page 6" aria-current="page">6 </a> </li> <li> <a href="/search/?searchtype=author&query=Yao%2C+X&start=300" class="pagination-link " aria-label="Page 7" aria-current="page">7 </a> </li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository