Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 1,489 results for author: <span class="mathjax">Liu, M</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Liu%2C+M">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Liu, M"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Liu%2C+M&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Liu, M"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Liu%2C+M&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Liu%2C+M&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+M&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+M&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+M&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+M&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14125">arXiv:2411.14125</a> <span> [<a href="https://arxiv.org/pdf/2411.14125">pdf</a>, <a href="https://arxiv.org/format/2411.14125">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> RestorerID: Towards Tuning-Free Face Restoration with ID Preservation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ying%2C+J">Jiacheng Ying</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mushui Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhe Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Runming Zhang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhu Yu</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+S">Siming Fu</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+S">Si-Yuan Cao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+C">Chao Wu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yunlong Yu</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+H">Hui-Liang Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14125v1-abstract-short" style="display: inline;"> Blind face restoration has made great progress in producing high-quality and lifelike images. Yet it remains challenging to preserve the ID information especially when the degradation is heavy. Current reference-guided face restoration approaches either require face alignment or personalized test-tuning, which are unfaithful or time-consuming. In this paper, we propose a tuning-free method named R… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14125v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14125v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14125v1-abstract-full" style="display: none;"> Blind face restoration has made great progress in producing high-quality and lifelike images. Yet it remains challenging to preserve the ID information especially when the degradation is heavy. Current reference-guided face restoration approaches either require face alignment or personalized test-tuning, which are unfaithful or time-consuming. In this paper, we propose a tuning-free method named RestorerID that incorporates ID preservation during face restoration. RestorerID is a diffusion model-based method that restores low-quality images with varying levels of degradation by using a single reference image. To achieve this, we propose a unified framework to combine the ID injection with the base blind face restoration model. In addition, we design a novel Face ID Rebalancing Adapter (FIR-Adapter) to tackle the problems of content unconsistency and contours misalignment that are caused by information conflicts between the low-quality input and reference image. Furthermore, by employing an Adaptive ID-Scale Adjusting strategy, RestorerID can produce superior restored images across various levels of degradation. Experimental results on the Celeb-Ref dataset and real-world scenarios demonstrate that RestorerID effectively delivers high-quality face restoration with ID preservation, achieving a superior performance compared to the test-tuning approaches and other reference-guided ones. The code of RestorerID is available at \url{https://github.com/YingJiacheng/RestorerID}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14125v1-abstract-full').style.display = 'none'; document.getElementById('2411.14125v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14046">arXiv:2411.14046</a> <span> [<a href="https://arxiv.org/pdf/2411.14046">pdf</a>, <a href="https://arxiv.org/format/2411.14046">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> REFOL: Resource-Efficient Federated Online Learning for Traffic Flow Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Q">Qingxiang Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+S">Sheng Sun</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+Y">Yuxuan Liang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xiaolong Xu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Min Liu</a>, <a href="/search/cs?searchtype=author&query=Bilal%2C+M">Muhammad Bilal</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuwei Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xujing Li</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yu Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14046v1-abstract-short" style="display: inline;"> Multiple federated learning (FL) methods are proposed for traffic flow forecasting (TFF) to avoid heavy-transmission and privacy-leaking concerns resulting from the disclosure of raw data in centralized methods. However, these FL methods adopt offline learning which may yield subpar performance, when concept drift occurs, i.e., distributions of historical and future data vary. Online learning can… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14046v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14046v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14046v1-abstract-full" style="display: none;"> Multiple federated learning (FL) methods are proposed for traffic flow forecasting (TFF) to avoid heavy-transmission and privacy-leaking concerns resulting from the disclosure of raw data in centralized methods. However, these FL methods adopt offline learning which may yield subpar performance, when concept drift occurs, i.e., distributions of historical and future data vary. Online learning can detect concept drift during model training, thus more applicable to TFF. Nevertheless, the existing federated online learning method for TFF fails to efficiently solve the concept drift problem and causes tremendous computing and communication overhead. Therefore, we propose a novel method named Resource-Efficient Federated Online Learning (REFOL) for TFF, which guarantees prediction performance in a communication-lightweight and computation-efficient way. Specifically, we design a data-driven client participation mechanism to detect the occurrence of concept drift and determine clients' participation necessity. Subsequently, we propose an adaptive online optimization strategy, which guarantees prediction performance and meanwhile avoids meaningless model updates. Then, a graph convolution-based model aggregation mechanism is designed, aiming to assess participants' contribution based on spatial correlation without importing extra communication and computing consumption on clients. Finally, we conduct extensive experiments on real-world datasets to demonstrate the superiority of REFOL in terms of prediction improvement and resource economization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14046v1-abstract-full').style.display = 'none'; document.getElementById('2411.14046v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13990">arXiv:2411.13990</a> <span> [<a href="https://arxiv.org/pdf/2411.13990">pdf</a>, <a href="https://arxiv.org/format/2411.13990">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Repository-level Code Translation Benchmark Targeting Rust </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ou%2C+G">Guangsheng Ou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mingwei Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuxuan Chen</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+X">Xing Peng</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Z">Zibin Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13990v1-abstract-short" style="display: inline;"> Recent advances in large language models (LLMs) have shown significant capabilities in code translation, often evaluated using benchmarks like CodeTransOcean. However, these evaluations typically focus on simple, function-level translations without considering dependencies, which does not reflect the complexities of real-world software development. Further, their effectiveness in translating to ne… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13990v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13990v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13990v1-abstract-full" style="display: none;"> Recent advances in large language models (LLMs) have shown significant capabilities in code translation, often evaluated using benchmarks like CodeTransOcean. However, these evaluations typically focus on simple, function-level translations without considering dependencies, which does not reflect the complexities of real-world software development. Further, their effectiveness in translating to newer, lower-resource languages like Rust in realistic scenarios is still under-explored. To address this gap, we introduce first repository-level code translation benchmark comprising 375 tasks targeting Rust, complete with relevant dependencies. Using this benchmark, we study four state-of-the-art LLMs, analyzing their erroneous outputs to understand their performance in more complex translation scenarios. Our findings reveal that LLMs exhibit substantially worse performance (41.5%-56.2% Pass@1 drop of GPT-4) on repository-level translations compared to simpler tasks, highlighting limitations in existing evaluation methods. The model that performed the best is Claude-3.5, demonstrating the strongest translation capabilities in both basic functionality accuracy and several relevant additional abilities. Additionally, we discover that LLMs struggle with identifying language differences in complex tasks, and that increased dependencies correlate with greater translation difficulty. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13990v1-abstract-full').style.display = 'none'; document.getElementById('2411.13990v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12824">arXiv:2411.12824</a> <span> [<a href="https://arxiv.org/pdf/2411.12824">pdf</a>, <a href="https://arxiv.org/format/2411.12824">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Generalized Prompt Tuning: Adapting Frozen Univariate Time Series Foundation Models for Multivariate Healthcare Time Series </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mingzhu Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+A+H">Angela H. Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+G+H">George H. Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12824v1-abstract-short" style="display: inline;"> Time series foundation models are pre-trained on large datasets and are able to achieve state-of-the-art performance in diverse tasks. However, to date, there has been limited work demonstrating how well these models perform in medical applications, where labeled data can be scarce. Further, we observe that currently, the majority of time series foundation models either are univariate in nature, o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12824v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12824v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12824v1-abstract-full" style="display: none;"> Time series foundation models are pre-trained on large datasets and are able to achieve state-of-the-art performance in diverse tasks. However, to date, there has been limited work demonstrating how well these models perform in medical applications, where labeled data can be scarce. Further, we observe that currently, the majority of time series foundation models either are univariate in nature, or assume channel independence, meaning that they handle multivariate time series but do not model how the different variables relate. In this paper, we propose a prompt-tuning-inspired fine-tuning technique, Generalized Prompt Tuning (Gen-P-Tuning), that enables us to adapt an existing univariate time series foundation model (treated as frozen) to handle multivariate time series prediction. Our approach provides a way to combine information across channels (variables) of multivariate time series. We demonstrate the effectiveness of our fine-tuning approach against various baselines on two MIMIC classification tasks, and on influenza-like illness forecasting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12824v1-abstract-full').style.display = 'none'; document.getElementById('2411.12824v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Machine Learning for Health (ML4H 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12405">arXiv:2411.12405</a> <span> [<a href="https://arxiv.org/pdf/2411.12405">pdf</a>, <a href="https://arxiv.org/format/2411.12405">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Evaluating the Prompt Steerability of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Miehling%2C+E">Erik Miehling</a>, <a href="/search/cs?searchtype=author&query=Desmond%2C+M">Michael Desmond</a>, <a href="/search/cs?searchtype=author&query=Ramamurthy%2C+K+N">Karthikeyan Natesan Ramamurthy</a>, <a href="/search/cs?searchtype=author&query=Daly%2C+E+M">Elizabeth M. Daly</a>, <a href="/search/cs?searchtype=author&query=Dognin%2C+P">Pierre Dognin</a>, <a href="/search/cs?searchtype=author&query=Rios%2C+J">Jesus Rios</a>, <a href="/search/cs?searchtype=author&query=Bouneffouf%2C+D">Djallel Bouneffouf</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Miao Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12405v1-abstract-short" style="display: inline;"> Building pluralistic AI requires designing models that are able to be shaped to represent a wide range of value systems and cultures. Achieving this requires first being able to evaluate the degree to which a given model is capable of reflecting various personas. To this end, we propose a benchmark for evaluating the steerability of model personas as a function of prompting. Our design is based on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12405v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12405v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12405v1-abstract-full" style="display: none;"> Building pluralistic AI requires designing models that are able to be shaped to represent a wide range of value systems and cultures. Achieving this requires first being able to evaluate the degree to which a given model is capable of reflecting various personas. To this end, we propose a benchmark for evaluating the steerability of model personas as a function of prompting. Our design is based on a formal definition of prompt steerability, which analyzes the degree to which a model's joint behavioral distribution can be shifted from its baseline behavior. By defining steerability indices and inspecting how these indices change as a function of steering effort, we can estimate the steerability of a model across various persona dimensions and directions. Our benchmark reveals that the steerability of many current models is limited -- due to both a skew in their baseline behavior and an asymmetry in their steerability across many persona dimensions. We release an implementation of our benchmark at https://github.com/IBM/prompt-steering. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12405v1-abstract-full').style.display = 'none'; document.getElementById('2411.12405v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.12078">arXiv:2411.12078</a> <span> [<a href="https://arxiv.org/pdf/2411.12078">pdf</a>, <a href="https://arxiv.org/format/2411.12078">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Molecule Generation with Fragment Retrieval Augmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+S">Seul Lee</a>, <a href="/search/cs?searchtype=author&query=Kreis%2C+K">Karsten Kreis</a>, <a href="/search/cs?searchtype=author&query=Veccham%2C+S+P">Srimukh Prasad Veccham</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Meng Liu</a>, <a href="/search/cs?searchtype=author&query=Reidenbach%2C+D">Danny Reidenbach</a>, <a href="/search/cs?searchtype=author&query=Paliwal%2C+S">Saee Paliwal</a>, <a href="/search/cs?searchtype=author&query=Vahdat%2C+A">Arash Vahdat</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+W">Weili Nie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.12078v1-abstract-short" style="display: inline;"> Fragment-based drug discovery, in which molecular fragments are assembled into new molecules with desirable biochemical properties, has achieved great success. However, many fragment-based molecule generation methods show limited exploration beyond the existing fragments in the database as they only reassemble or slightly modify the given ones. To tackle this problem, we propose a new fragment-bas… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12078v1-abstract-full').style.display = 'inline'; document.getElementById('2411.12078v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.12078v1-abstract-full" style="display: none;"> Fragment-based drug discovery, in which molecular fragments are assembled into new molecules with desirable biochemical properties, has achieved great success. However, many fragment-based molecule generation methods show limited exploration beyond the existing fragments in the database as they only reassemble or slightly modify the given ones. To tackle this problem, we propose a new fragment-based molecule generation framework with retrieval augmentation, namely Fragment Retrieval-Augmented Generation (f-RAG). f-RAG is based on a pre-trained molecular generative model that proposes additional fragments from input fragments to complete and generate a new molecule. Given a fragment vocabulary, f-RAG retrieves two types of fragments: (1) hard fragments, which serve as building blocks that will be explicitly included in the newly generated molecule, and (2) soft fragments, which serve as reference to guide the generation of new fragments through a trainable fragment injection module. To extrapolate beyond the existing fragments, f-RAG updates the fragment vocabulary with generated fragments via an iterative refinement process which is further enhanced with post-hoc genetic fragment modification. f-RAG can achieve an improved exploration-exploitation trade-off by maintaining a pool of fragments and expanding it with novel and high-quality fragments through a strong generative prior. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.12078v1-abstract-full').style.display = 'none'; document.getElementById('2411.12078v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10548">arXiv:2411.10548</a> <span> [<a href="https://arxiv.org/pdf/2411.10548">pdf</a>, <a href="https://arxiv.org/ps/2411.10548">ps</a>, <a href="https://arxiv.org/format/2411.10548">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> </div> </div> <p class="title is-5 mathjax"> BioNeMo Framework: a modular, high-performance library for AI model development in drug discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=John%2C+P+S">Peter St. John</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+D">Dejun Lin</a>, <a href="/search/cs?searchtype=author&query=Binder%2C+P">Polina Binder</a>, <a href="/search/cs?searchtype=author&query=Greaves%2C+M">Malcolm Greaves</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+V">Vega Shah</a>, <a href="/search/cs?searchtype=author&query=John%2C+J+S">John St. John</a>, <a href="/search/cs?searchtype=author&query=Lange%2C+A">Adrian Lange</a>, <a href="/search/cs?searchtype=author&query=Hsu%2C+P">Patrick Hsu</a>, <a href="/search/cs?searchtype=author&query=Illango%2C+R">Rajesh Illango</a>, <a href="/search/cs?searchtype=author&query=Ramanathan%2C+A">Arvind Ramanathan</a>, <a href="/search/cs?searchtype=author&query=Anandkumar%2C+A">Anima Anandkumar</a>, <a href="/search/cs?searchtype=author&query=Brookes%2C+D+H">David H Brookes</a>, <a href="/search/cs?searchtype=author&query=Busia%2C+A">Akosua Busia</a>, <a href="/search/cs?searchtype=author&query=Mahajan%2C+A">Abhishaike Mahajan</a>, <a href="/search/cs?searchtype=author&query=Malina%2C+S">Stephen Malina</a>, <a href="/search/cs?searchtype=author&query=Prasad%2C+N">Neha Prasad</a>, <a href="/search/cs?searchtype=author&query=Sinai%2C+S">Sam Sinai</a>, <a href="/search/cs?searchtype=author&query=Edwards%2C+L">Lindsay Edwards</a>, <a href="/search/cs?searchtype=author&query=Gaudelet%2C+T">Thomas Gaudelet</a>, <a href="/search/cs?searchtype=author&query=Regep%2C+C">Cristian Regep</a>, <a href="/search/cs?searchtype=author&query=Steinegger%2C+M">Martin Steinegger</a>, <a href="/search/cs?searchtype=author&query=Rost%2C+B">Burkhard Rost</a>, <a href="/search/cs?searchtype=author&query=Brace%2C+A">Alexander Brace</a>, <a href="/search/cs?searchtype=author&query=Hippe%2C+K">Kyle Hippe</a>, <a href="/search/cs?searchtype=author&query=Naef%2C+L">Luca Naef</a> , et al. (63 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10548v1-abstract-short" style="display: inline;"> Artificial Intelligence models encoding biology and chemistry are opening new routes to high-throughput and high-quality in-silico drug development. However, their training increasingly relies on computational scale, with recent protein language models (pLM) training on hundreds of graphical processing units (GPUs). We introduce the BioNeMo Framework to facilitate the training of computational bio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10548v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10548v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10548v1-abstract-full" style="display: none;"> Artificial Intelligence models encoding biology and chemistry are opening new routes to high-throughput and high-quality in-silico drug development. However, their training increasingly relies on computational scale, with recent protein language models (pLM) training on hundreds of graphical processing units (GPUs). We introduce the BioNeMo Framework to facilitate the training of computational biology and chemistry AI models across hundreds of GPUs. Its modular design allows the integration of individual components, such as data loaders, into existing workflows and is open to community contributions. We detail technical features of the BioNeMo Framework through use cases such as pLM pre-training and fine-tuning. On 256 NVIDIA A100s, BioNeMo Framework trains a three billion parameter BERT-based pLM on over one trillion tokens in 4.2 days. The BioNeMo Framework is open-source and free for everyone to use. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10548v1-abstract-full').style.display = 'none'; document.getElementById('2411.10548v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09422">arXiv:2411.09422</a> <span> [<a href="https://arxiv.org/pdf/2411.09422">pdf</a>, <a href="https://arxiv.org/format/2411.09422">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> OpenLS-DGF: An Adaptive Open-Source Dataset Generation Framework for Machine Learning Tasks in Logic Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ni%2C+L">Liwei Ni</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Miao Liu</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+X">Xingyu Meng</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+X">Xiaoze Lin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junfeng Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+G">Guojie Luo</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+Z">Zhufei Chu</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+W">Weikang Qian</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaoyan Yang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+B">Biwei Xie</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xingquan Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Huawei Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09422v2-abstract-short" style="display: inline;"> This paper introduces OpenLS-DGF, an adaptive logic synthesis dataset generation framework, to enhance machine learning~(ML) applications within the logic synthesis process. Previous dataset generation flows were tailored for specific tasks or lacked integrated machine learning capabilities. While OpenLS-DGF supports various machine learning tasks by encapsulating the three fundamental steps of lo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09422v2-abstract-full').style.display = 'inline'; document.getElementById('2411.09422v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09422v2-abstract-full" style="display: none;"> This paper introduces OpenLS-DGF, an adaptive logic synthesis dataset generation framework, to enhance machine learning~(ML) applications within the logic synthesis process. Previous dataset generation flows were tailored for specific tasks or lacked integrated machine learning capabilities. While OpenLS-DGF supports various machine learning tasks by encapsulating the three fundamental steps of logic synthesis: Boolean representation, logic optimization, and technology mapping. It preserves the original information in both Verilog and machine-learning-friendly GraphML formats. The verilog files offer semi-customizable capabilities, enabling researchers to insert additional steps and incrementally refine the generated dataset. Furthermore, OpenLS-DGF includes an adaptive circuit engine that facilitates the final dataset management and downstream tasks. The generated OpenLS-D-v1 dataset comprises 46 combinational designs from established benchmarks, totaling over 966,000 Boolean circuits. OpenLS-D-v1 supports integrating new data features, making it more versatile for new challenges. This paper demonstrates the versatility of OpenLS-D-v1 through four distinct downstream tasks: circuit classification, circuit ranking, quality of results (QoR) prediction, and probability prediction. Each task is chosen to represent essential steps of logic synthesis, and the experimental results show the generated dataset from OpenLS-DGF achieves prominent diversity and applicability. The source code and datasets are available at https://github.com/Logic-Factory/ACE/blob/master/OpenLS-DGF/readme.md. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09422v2-abstract-full').style.display = 'none'; document.getElementById('2411.09422v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09360">arXiv:2411.09360</a> <span> [<a href="https://arxiv.org/pdf/2411.09360">pdf</a>, <a href="https://arxiv.org/format/2411.09360">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> D4W: Dependable Data-Driven Dynamics for Wheeled Robots </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Yunfeng Lin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Minghuan Liu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09360v1-abstract-short" style="display: inline;"> Wheeled robots have gained significant attention due to their wide range of applications in manufacturing, logistics, and service industries. However, due to the difficulty of building a highly accurate dynamics model for wheeled robots, developing and testing control algorithms for them remains challenging and time-consuming, requiring extensive physical experimentation. To address this problem,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09360v1-abstract-full').style.display = 'inline'; document.getElementById('2411.09360v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09360v1-abstract-full" style="display: none;"> Wheeled robots have gained significant attention due to their wide range of applications in manufacturing, logistics, and service industries. However, due to the difficulty of building a highly accurate dynamics model for wheeled robots, developing and testing control algorithms for them remains challenging and time-consuming, requiring extensive physical experimentation. To address this problem, we propose D4W, i.e., Dependable Data-Driven Dynamics for Wheeled Robots, a simulation framework incorporating data-driven methods to accelerate the development and evaluation of algorithms for wheeled robots. The key contribution of D4W is a solution that utilizes real-world sensor data to learn accurate models of robot dynamics. The learned dynamics can capture complex robot behaviors and interactions with the environment throughout simulations, surpassing the limitations of analytical methods, which only work in simplified scenarios. Experimental results show that D4W achieves the best simulation accuracy compared to traditional approaches, allowing for rapid iteration of wheel robot algorithms with less or no need for fine-tuning in reality. We further verify the usability and practicality of the proposed framework through integration with existing simulators and controllers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09360v1-abstract-full').style.display = 'none'; document.getElementById('2411.09360v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The Fifth International Conference on Distributed Artificial Intelligence</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07871">arXiv:2411.07871</a> <span> [<a href="https://arxiv.org/pdf/2411.07871">pdf</a>, <a href="https://arxiv.org/format/2411.07871">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Leveraging Multimodal Models for Enhanced Neuroimaging Diagnostics in Alzheimer's Disease </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chiumento%2C+F">Francesco Chiumento</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mingming Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07871v1-abstract-short" style="display: inline;"> The rapid advancements in Large Language Models (LLMs) and Vision-Language Models (VLMs) have shown great potential in medical diagnostics, particularly in radiology, where datasets such as X-rays are paired with human-generated diagnostic reports. However, a significant research gap exists in the neuroimaging field, especially for conditions such as Alzheimer's disease, due to the lack of compreh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07871v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07871v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07871v1-abstract-full" style="display: none;"> The rapid advancements in Large Language Models (LLMs) and Vision-Language Models (VLMs) have shown great potential in medical diagnostics, particularly in radiology, where datasets such as X-rays are paired with human-generated diagnostic reports. However, a significant research gap exists in the neuroimaging field, especially for conditions such as Alzheimer's disease, due to the lack of comprehensive diagnostic reports that can be utilized for model fine-tuning. This paper addresses this gap by generating synthetic diagnostic reports using GPT-4o-mini on structured data from the OASIS-4 dataset, which comprises 663 patients. Using the synthetic reports as ground truth for training and validation, we then generated neurological reports directly from the images in the dataset leveraging the pre-trained BiomedCLIP and T5 models. Our proposed method achieved a BLEU-4 score of 0.1827, ROUGE-L score of 0.3719, and METEOR score of 0.4163, revealing its potential in generating clinically relevant and accurate diagnostic reports. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07871v1-abstract-full').style.display = 'none'; document.getElementById('2411.07871v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The paper has been accepted by the conference: "2024 International Conference on Big Data (IEEE Big Data 2024)"</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07632">arXiv:2411.07632</a> <span> [<a href="https://arxiv.org/pdf/2411.07632">pdf</a>, <a href="https://arxiv.org/format/2411.07632">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> RPCAcc: A High-Performance and Reconfigurable PCIe-attached RPC Accelerator </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jie Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Hongjing Huang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xuzheng Xu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiang Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jieru Zhao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zeke Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07632v2-abstract-short" style="display: inline;"> The emerging microservice/serverless-based cloud programming paradigm and the rising networking speeds leave the RPC stack as the predominant data center tax. Domain-specific hardware acceleration holds the potential to disentangle the overhead and save host CPU cycles. However, state-of-the-art RPC accelerators integrate RPC logic into the CPU or use specialized low-latency interconnects, hardly… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07632v2-abstract-full').style.display = 'inline'; document.getElementById('2411.07632v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07632v2-abstract-full" style="display: none;"> The emerging microservice/serverless-based cloud programming paradigm and the rising networking speeds leave the RPC stack as the predominant data center tax. Domain-specific hardware acceleration holds the potential to disentangle the overhead and save host CPU cycles. However, state-of-the-art RPC accelerators integrate RPC logic into the CPU or use specialized low-latency interconnects, hardly adopted in commodity servers. To this end, we design and implement RPCAcc, a software-hardware co-designed RPC on-NIC accelerator that enables reconfigurable RPC kernel offloading. RPCAcc connects to the server through the most widely used PCIe interconnect. To grapple with the ramifications of PCIe-induced challenges, RPCAcc introduces three techniques:(a) a target-aware deserializer that effectively batches cross-PCIe writes on the accelerator's on-chip memory using compacted hardware data structures; (b) a memory-affinity CPU-accelerator collaborative serializer, which trades additional host memory copies for slow cross-PCIe transfers; (c) an automatic field update technique that transparently codifies the schema based on dynamic reconfigure RPC kernels to minimize superfluous PCIe traversals. We prototype RPCAcc using the Xilinx U280 FPGA card. On HyperProtoBench, RPCAcc achieves 3.2X lower serialization time than a comparable RPC accelerator baseline and demonstrates up to 2.6X throughput improvement in the end-to-end cloud workload. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07632v2-abstract-full').style.display = 'none'; document.getElementById('2411.07632v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07504">arXiv:2411.07504</a> <span> [<a href="https://arxiv.org/pdf/2411.07504">pdf</a>, <a href="https://arxiv.org/format/2411.07504">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> AdaS&S: a One-Shot Supernet Approach for Automatic Embedding Size Search in Deep Recommender System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+H">He Wei</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yuekui Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yang Zhang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Haiyang Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Meixi Liu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+S">Shaoping Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07504v1-abstract-short" style="display: inline;"> Deep Learning Recommendation Model(DLRM)s utilize the embedding layer to represent various categorical features. Traditional DLRMs adopt unified embedding size for all features, leading to suboptimal performance and redundant parameters. Thus, lots of Automatic Embedding size Search (AES) works focus on obtaining mixed embedding sizes with strong model performance. However, previous AES works can… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07504v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07504v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07504v1-abstract-full" style="display: none;"> Deep Learning Recommendation Model(DLRM)s utilize the embedding layer to represent various categorical features. Traditional DLRMs adopt unified embedding size for all features, leading to suboptimal performance and redundant parameters. Thus, lots of Automatic Embedding size Search (AES) works focus on obtaining mixed embedding sizes with strong model performance. However, previous AES works can hardly address several challenges together: (1) The search results of embedding sizes are unstable; (2) Recommendation effect with AES results is unsatisfactory; (3) Memory cost of embeddings is uncontrollable. To address these challenges, we propose a novel one-shot AES framework called AdaS&S, in which a supernet encompassing various candidate embeddings is built and AES is performed as searching network architectures within it. Our framework contains two main stages: In the first stage, we decouple training parameters from searching embedding sizes, and propose the Adaptive Sampling method to yield a well-trained supernet, which further helps to produce stable AES results. In the second stage, to obtain embedding sizes that benefits the model effect, we design a reinforcement learning search process which utilizes the supernet trained previously. Meanwhile, to adapt searching to specific resource constraint, we introduce the resource competition penalty to balance the model effectiveness and memory cost of embeddings. We conduct extensive experiments on public datasets to show the superiority of AdaS&S. Our method could improve AUC by about 0.3% while saving about 20% of model parameters. Empirical analysis also shows that the stability of searching results in AdaS&S significantly exceeds other methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07504v1-abstract-full').style.display = 'none'; document.getElementById('2411.07504v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07206">arXiv:2411.07206</a> <span> [<a href="https://arxiv.org/pdf/2411.07206">pdf</a>, <a href="https://arxiv.org/format/2411.07206">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Tasks, Time, and Tools: Quantifying Online Sensemaking Efforts Through a Survey-based Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kuznetsov%2C+A">Andrew Kuznetsov</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M+X">Michael Xieyang Liu</a>, <a href="/search/cs?searchtype=author&query=Kittur%2C+A">Aniket Kittur</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07206v1-abstract-short" style="display: inline;"> Aiming to help people conduct online research tasks, much research has gone into tools for searching for, collecting, organizing, and synthesizing online information. However, outside of the lab, in-the-wild sensemaking sessions (with data on tasks, users, their tools and challenges) can ground us in the reality of such efforts and the state of tool support. We use a survey-based approach with aid… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07206v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07206v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07206v1-abstract-full" style="display: none;"> Aiming to help people conduct online research tasks, much research has gone into tools for searching for, collecting, organizing, and synthesizing online information. However, outside of the lab, in-the-wild sensemaking sessions (with data on tasks, users, their tools and challenges) can ground us in the reality of such efforts and the state of tool support. We use a survey-based approach with aided recall focused on segmenting and contextualizing individual exploratory browsing sessions to conduct a mixed method analysis of everyday sensemaking sessions in the traditional desktop browser setting while preserving user privacy. We report data from our survey (n=111) collected in September, 2022, and use these results to update and deepen the rich literature on information seeking behavior and exploratory search, contributing new empirical insights into the time spent per week and distribution of that time across tasks, and the lack of externalization and tool-use despite widespread desire for support. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07206v1-abstract-full').style.display = 'none'; document.getElementById('2411.07206v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07135">arXiv:2411.07135</a> <span> [<a href="https://arxiv.org/pdf/2411.07135">pdf</a>, <a href="https://arxiv.org/format/2411.07135">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> Edify 3D: Scalable High-Quality 3D Asset Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=NVIDIA"> NVIDIA</a>, <a href="/search/cs?searchtype=author&query=%3A"> :</a>, <a href="/search/cs?searchtype=author&query=Bala%2C+M">Maciej Bala</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+Y">Yin Cui</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Y">Yifan Ding</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+Y">Yunhao Ge</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+Z">Zekun Hao</a>, <a href="/search/cs?searchtype=author&query=Hasselgren%2C+J">Jon Hasselgren</a>, <a href="/search/cs?searchtype=author&query=Huffman%2C+J">Jacob Huffman</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+J">Jingyi Jin</a>, <a href="/search/cs?searchtype=author&query=Lewis%2C+J+P">J. P. Lewis</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhaoshuo Li</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+C">Chen-Hsuan Lin</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Yen-Chen Lin</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+T">Tsung-Yi Lin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming-Yu Liu</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+A">Alice Luo</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Q">Qianli Ma</a>, <a href="/search/cs?searchtype=author&query=Munkberg%2C+J">Jacob Munkberg</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+S">Stella Shi</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+F">Fangyin Wei</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+D">Donglai Xiang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jiashu Xu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+X">Xiaohui Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qinsheng Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07135v1-abstract-short" style="display: inline;"> We introduce Edify 3D, an advanced solution designed for high-quality 3D asset generation. Our method first synthesizes RGB and surface normal images of the described object at multiple viewpoints using a diffusion model. The multi-view observations are then used to reconstruct the shape, texture, and PBR materials of the object. Our method can generate high-quality 3D assets with detailed geometr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07135v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07135v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07135v1-abstract-full" style="display: none;"> We introduce Edify 3D, an advanced solution designed for high-quality 3D asset generation. Our method first synthesizes RGB and surface normal images of the described object at multiple viewpoints using a diffusion model. The multi-view observations are then used to reconstruct the shape, texture, and PBR materials of the object. Our method can generate high-quality 3D assets with detailed geometry, clean shape topologies, high-resolution textures, and materials within 2 minutes of runtime. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07135v1-abstract-full').style.display = 'none'; document.getElementById('2411.07135v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project website: https://research.nvidia.com/labs/dir/edify-3d</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.07126">arXiv:2411.07126</a> <span> [<a href="https://arxiv.org/pdf/2411.07126">pdf</a>, <a href="https://arxiv.org/format/2411.07126">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Edify Image: High-Quality Image Generation with Pixel Space Laplacian Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=NVIDIA"> NVIDIA</a>, <a href="/search/cs?searchtype=author&query=%3A"> :</a>, <a href="/search/cs?searchtype=author&query=Atzmon%2C+Y">Yuval Atzmon</a>, <a href="/search/cs?searchtype=author&query=Bala%2C+M">Maciej Bala</a>, <a href="/search/cs?searchtype=author&query=Balaji%2C+Y">Yogesh Balaji</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+T">Tiffany Cai</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+Y">Yin Cui</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+J">Jiaojiao Fan</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+Y">Yunhao Ge</a>, <a href="/search/cs?searchtype=author&query=Gururani%2C+S">Siddharth Gururani</a>, <a href="/search/cs?searchtype=author&query=Huffman%2C+J">Jacob Huffman</a>, <a href="/search/cs?searchtype=author&query=Isaac%2C+R">Ronald Isaac</a>, <a href="/search/cs?searchtype=author&query=Jannaty%2C+P">Pooya Jannaty</a>, <a href="/search/cs?searchtype=author&query=Karras%2C+T">Tero Karras</a>, <a href="/search/cs?searchtype=author&query=Lam%2C+G">Grace Lam</a>, <a href="/search/cs?searchtype=author&query=Lewis%2C+J+P">J. P. Lewis</a>, <a href="/search/cs?searchtype=author&query=Licata%2C+A">Aaron Licata</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Yen-Chen Lin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming-Yu Liu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Q">Qianli Ma</a>, <a href="/search/cs?searchtype=author&query=Mallya%2C+A">Arun Mallya</a>, <a href="/search/cs?searchtype=author&query=Martino-Tarr%2C+A">Ashlee Martino-Tarr</a>, <a href="/search/cs?searchtype=author&query=Mendez%2C+D">Doug Mendez</a>, <a href="/search/cs?searchtype=author&query=Nah%2C+S">Seungjun Nah</a>, <a href="/search/cs?searchtype=author&query=Pruett%2C+C">Chris Pruett</a> , et al. (7 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.07126v1-abstract-short" style="display: inline;"> We introduce Edify Image, a family of diffusion models capable of generating photorealistic image content with pixel-perfect accuracy. Edify Image utilizes cascaded pixel-space diffusion models trained using a novel Laplacian diffusion process, in which image signals at different frequency bands are attenuated at varying rates. Edify Image supports a wide range of applications, including text-to-i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07126v1-abstract-full').style.display = 'inline'; document.getElementById('2411.07126v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.07126v1-abstract-full" style="display: none;"> We introduce Edify Image, a family of diffusion models capable of generating photorealistic image content with pixel-perfect accuracy. Edify Image utilizes cascaded pixel-space diffusion models trained using a novel Laplacian diffusion process, in which image signals at different frequency bands are attenuated at varying rates. Edify Image supports a wide range of applications, including text-to-image synthesis, 4K upsampling, ControlNets, 360 HDR panorama generation, and finetuning for image customization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.07126v1-abstract-full').style.display = 'none'; document.getElementById('2411.07126v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06284">arXiv:2411.06284</a> <span> [<a href="https://arxiv.org/pdf/2411.06284">pdf</a>, <a href="https://arxiv.org/format/2411.06284">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Comprehensive Survey and Guide to Multimodal Large Language Models in Vision-Language Tasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liang%2C+C+X">Chia Xin Liang</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+P">Pu Tian</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+C+H">Caitlyn Heqi Yin</a>, <a href="/search/cs?searchtype=author&query=Yua%2C+Y">Yao Yua</a>, <a href="/search/cs?searchtype=author&query=An-Hou%2C+W">Wei An-Hou</a>, <a href="/search/cs?searchtype=author&query=Ming%2C+L">Li Ming</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianyang Wang</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Z">Ziqian Bi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06284v1-abstract-short" style="display: inline;"> This survey and application guide to multimodal large language models(MLLMs) explores the rapidly developing field of MLLMs, examining their architectures, applications, and impact on AI and Generative Models. Starting with foundational concepts, we delve into how MLLMs integrate various data types, including text, images, video and audio, to enable complex AI systems for cross-modal understanding… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06284v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06284v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06284v1-abstract-full" style="display: none;"> This survey and application guide to multimodal large language models(MLLMs) explores the rapidly developing field of MLLMs, examining their architectures, applications, and impact on AI and Generative Models. Starting with foundational concepts, we delve into how MLLMs integrate various data types, including text, images, video and audio, to enable complex AI systems for cross-modal understanding and generation. It covers essential topics such as training methods, architectural components, and practical applications in various fields, from visual storytelling to enhanced accessibility. Through detailed case studies and technical analysis, the text examines prominent MLLM implementations while addressing key challenges in scalability, robustness, and cross-modal learning. Concluding with a discussion of ethical considerations, responsible AI development, and future directions, this authoritative resource provides both theoretical frameworks and practical insights. It offers a balanced perspective on the opportunities and challenges in the development and deployment of MLLMs, and is highly valuable for researchers, practitioners, and students interested in the intersection of natural language processing and computer vision. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06284v1-abstract-full').style.display = 'none'; document.getElementById('2411.06284v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05826">arXiv:2411.05826</a> <span> [<a href="https://arxiv.org/pdf/2411.05826">pdf</a>, <a href="https://arxiv.org/ps/2411.05826">ps</a>, <a href="https://arxiv.org/format/2411.05826">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> From Pixels to Prose: Advancing Multi-Modal Language Models for Remote Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+X">Xintian Sun</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Benji Peng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Charles Zhang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+F">Fei Jin</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+Q">Qian Niu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junyu Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keyu Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Ming Li</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+P">Pohsun Feng</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Z">Ziqian Bi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yichao Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05826v1-abstract-short" style="display: inline;"> Remote sensing has evolved from simple image acquisition to complex systems capable of integrating and processing visual and textual data. This review examines the development and application of multi-modal language models (MLLMs) in remote sensing, focusing on their ability to interpret and describe satellite imagery using natural language. We cover the technical underpinnings of MLLMs, including… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05826v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05826v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05826v1-abstract-full" style="display: none;"> Remote sensing has evolved from simple image acquisition to complex systems capable of integrating and processing visual and textual data. This review examines the development and application of multi-modal language models (MLLMs) in remote sensing, focusing on their ability to interpret and describe satellite imagery using natural language. We cover the technical underpinnings of MLLMs, including dual-encoder architectures, Transformer models, self-supervised and contrastive learning, and cross-modal integration. The unique challenges of remote sensing data--varying spatial resolutions, spectral richness, and temporal changes--are analyzed for their impact on MLLM performance. Key applications such as scene description, object detection, change detection, text-to-image retrieval, image-to-text generation, and visual question answering are discussed to demonstrate their relevance in environmental monitoring, urban planning, and disaster response. We review significant datasets and resources supporting the training and evaluation of these models. Challenges related to computational demands, scalability, data quality, and domain adaptation are highlighted. We conclude by proposing future research directions and technological advancements to further enhance MLLM utility in remote sensing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05826v1-abstract-full').style.display = 'none'; document.getElementById('2411.05826v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05825">arXiv:2411.05825</a> <span> [<a href="https://arxiv.org/pdf/2411.05825">pdf</a>, <a href="https://arxiv.org/format/2411.05825">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SurfGNN: A robust surface-based prediction model with interpretability for coactivation maps of spatial and cortical features </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuoshuo Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiong Zhang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Y">Youbing Zeng</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+J">Jiaying Lin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Dan Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianjia Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+D">Duan Xu</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+H">Hosung Kim</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+B">Bingguang Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mengting Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05825v1-abstract-short" style="display: inline;"> Current brain surface-based prediction models often overlook the variability of regional attributes at the cortical feature level. While graph neural networks (GNNs) excel at capturing regional differences, they encounter challenges when dealing with complex, high-density graph structures. In this work, we consider the cortical surface mesh as a sparse graph and propose an interpretable prediction… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05825v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05825v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05825v1-abstract-full" style="display: none;"> Current brain surface-based prediction models often overlook the variability of regional attributes at the cortical feature level. While graph neural networks (GNNs) excel at capturing regional differences, they encounter challenges when dealing with complex, high-density graph structures. In this work, we consider the cortical surface mesh as a sparse graph and propose an interpretable prediction model-Surface Graph Neural Network (SurfGNN). SurfGNN employs topology-sampling learning (TSL) and region-specific learning (RSL) structures to manage individual cortical features at both lower and higher scales of the surface mesh, effectively tackling the challenges posed by the overly abundant mesh nodes and addressing the issue of heterogeneity in cortical regions. Building on this, a novel score-weighted fusion (SWF) method is implemented to merge nodal representations associated with each cortical feature for prediction. We apply our model to a neonatal brain age prediction task using a dataset of harmonized MR images from 481 subjects (503 scans). SurfGNN outperforms all existing state-of-the-art methods, demonstrating an improvement of at least 9.0% and achieving a mean absolute error (MAE) of 0.827+0.056 in postmenstrual weeks. Furthermore, it generates feature-level activation maps, indicating its capability to identify robust regional variations in different morphometric contributions for prediction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05825v1-abstract-full').style.display = 'none'; document.getElementById('2411.05825v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 6 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> J.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05036">arXiv:2411.05036</a> <span> [<a href="https://arxiv.org/pdf/2411.05036">pdf</a>, <a href="https://arxiv.org/ps/2411.05036">ps</a>, <a href="https://arxiv.org/format/2411.05036">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> From Word Vectors to Multimodal Embeddings: Techniques, Applications, and Future Directions For Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Charles Zhang</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Benji Peng</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+X">Xintian Sun</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+Q">Qian Niu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junyu Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keyu Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Ming Li</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+P">Pohsun Feng</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Z">Ziqian Bi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yichao Zhang</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+C">Cheng Fei</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+C+H">Caitlyn Heqi Yin</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L+K">Lawrence KQ Yan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianyang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05036v1-abstract-short" style="display: inline;"> Word embeddings and language models have transformed natural language processing (NLP) by facilitating the representation of linguistic elements in continuous vector spaces. This review visits foundational concepts such as the distributional hypothesis and contextual similarity, tracing the evolution from sparse representations like one-hot encoding to dense embeddings including Word2Vec, GloVe, a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05036v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05036v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05036v1-abstract-full" style="display: none;"> Word embeddings and language models have transformed natural language processing (NLP) by facilitating the representation of linguistic elements in continuous vector spaces. This review visits foundational concepts such as the distributional hypothesis and contextual similarity, tracing the evolution from sparse representations like one-hot encoding to dense embeddings including Word2Vec, GloVe, and fastText. We examine both static and contextualized embeddings, underscoring advancements in models such as ELMo, BERT, and GPT and their adaptations for cross-lingual and personalized applications. The discussion extends to sentence and document embeddings, covering aggregation methods and generative topic models, along with the application of embeddings in multimodal domains, including vision, robotics, and cognitive science. Advanced topics such as model compression, interpretability, numerical encoding, and bias mitigation are analyzed, addressing both technical challenges and ethical implications. Additionally, we identify future research directions, emphasizing the need for scalable training techniques, enhanced interpretability, and robust grounding in non-textual modalities. By synthesizing current methodologies and emerging trends, this survey offers researchers and practitioners an in-depth resource to push the boundaries of embedding-based language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05036v1-abstract-full').style.display = 'none'; document.getElementById('2411.05036v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05026">arXiv:2411.05026</a> <span> [<a href="https://arxiv.org/pdf/2411.05026">pdf</a>, <a href="https://arxiv.org/ps/2411.05026">ps</a>, <a href="https://arxiv.org/format/2411.05026">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Deep Learning and Machine Learning -- Natural Language Processing: From Theory to Application </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keyu Chen</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+C">Cheng Fei</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Z">Ziqian Bi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junyu Liu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Benji Peng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Sen Zhang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+X">Xuanhe Pan</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jiawei Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jinlang Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+C+H">Caitlyn Heqi Yin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yichao Zhang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+P">Pohsun Feng</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Y">Yizhu Wen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianyang Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Ming Li</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+J">Jintao Ren</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+Q">Qian Niu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Silin Chen</a>, <a href="/search/cs?searchtype=author&query=Hsieh%2C+W">Weiche Hsieh</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L+K+Q">Lawrence K. Q. Yan</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+C+X">Chia Xin Liang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Han Xu</a>, <a href="/search/cs?searchtype=author&query=Tseng%2C+H">Hong-Ming Tseng</a>, <a href="/search/cs?searchtype=author&query=Song%2C+X">Xinyuan Song</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05026v1-abstract-short" style="display: inline;"> With a focus on natural language processing (NLP) and the role of large language models (LLMs), we explore the intersection of machine learning, deep learning, and artificial intelligence. As artificial intelligence continues to revolutionize fields from healthcare to finance, NLP techniques such as tokenization, text classification, and entity recognition are essential for processing and understa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05026v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05026v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05026v1-abstract-full" style="display: none;"> With a focus on natural language processing (NLP) and the role of large language models (LLMs), we explore the intersection of machine learning, deep learning, and artificial intelligence. As artificial intelligence continues to revolutionize fields from healthcare to finance, NLP techniques such as tokenization, text classification, and entity recognition are essential for processing and understanding human language. This paper discusses advanced data preprocessing techniques and the use of frameworks like Hugging Face for implementing transformer-based models. Additionally, it highlights challenges such as handling multilingual data, reducing bias, and ensuring model robustness. By addressing key aspects of data processing and model fine-tuning, this work aims to provide insights into deploying effective and ethically sound AI solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05026v1-abstract-full').style.display = 'none'; document.getElementById('2411.05026v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">255 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02724">arXiv:2411.02724</a> <span> [<a href="https://arxiv.org/pdf/2411.02724">pdf</a>, <a href="https://arxiv.org/format/2411.02724">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TransUNext: towards a more advanced U-shaped framework for automatic vessel segmentation in the fundus image </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiang Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mingsi Liu</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+L">Lixin Duan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02724v1-abstract-short" style="display: inline;"> Purpose: Automatic and accurate segmentation of fundus vessel images has become an essential prerequisite for computer-aided diagnosis of ophthalmic diseases such as diabetes mellitus. The task of high-precision retinal vessel segmentation still faces difficulties due to the low contrast between the branch ends of retinal vessels and the background, the long and thin vessel span, and the variable… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02724v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02724v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02724v1-abstract-full" style="display: none;"> Purpose: Automatic and accurate segmentation of fundus vessel images has become an essential prerequisite for computer-aided diagnosis of ophthalmic diseases such as diabetes mellitus. The task of high-precision retinal vessel segmentation still faces difficulties due to the low contrast between the branch ends of retinal vessels and the background, the long and thin vessel span, and the variable morphology of the optic disc and optic cup in fundus vessel images. Methods: We propose a more advanced U-shaped architecture for a hybrid Transformer and CNN: TransUNext, which integrates an Efficient Self-attention Mechanism into the encoder and decoder of U-Net to capture both local features and global dependencies with minimal computational overhead. Meanwhile, the Global Multi-Scale Fusion (GMSF) module is further introduced to upgrade skip-connections, fuse high-level semantic and low-level detailed information, and eliminate high- and low-level semantic differences. Inspired by ConvNeXt, TransNeXt Block is designed to optimize the computational complexity of each base block in U-Net and avoid the information loss caused by the compressed dimension when the information is converted between the feature spaces of different dimensions. Results: We evaluated the proposed method on four public datasets DRIVE, STARE, CHASE-DB1, and HRF. In the experimental results, the AUC (area under the ROC curve) values were 0.9867, 0.9869, 0.9910, and 0.9887, which exceeded the other state-of-the-art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02724v1-abstract-full').style.display = 'none'; document.getElementById('2411.02724v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.01457">arXiv:2411.01457</a> <span> [<a href="https://arxiv.org/pdf/2411.01457">pdf</a>, <a href="https://arxiv.org/format/2411.01457">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Facet-Aware Multi-Head Mixture-of-Experts Model for Sequential Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mingrui Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Sixiao Zhang</a>, <a href="/search/cs?searchtype=author&query=Long%2C+C">Cheng Long</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.01457v1-abstract-short" style="display: inline;"> Sequential recommendation (SR) systems excel at capturing users' dynamic preferences by leveraging their interaction histories. Most existing SR systems assign a single embedding vector to each item to represent its features, and various types of models are adopted to combine these item embeddings into a sequence representation vector to capture the user intent. However, we argue that this represe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01457v1-abstract-full').style.display = 'inline'; document.getElementById('2411.01457v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.01457v1-abstract-full" style="display: none;"> Sequential recommendation (SR) systems excel at capturing users' dynamic preferences by leveraging their interaction histories. Most existing SR systems assign a single embedding vector to each item to represent its features, and various types of models are adopted to combine these item embeddings into a sequence representation vector to capture the user intent. However, we argue that this representation alone is insufficient to capture an item's multi-faceted nature (e.g., movie genres, starring actors). Besides, users often exhibit complex and varied preferences within these facets (e.g., liking both action and musical films in the facet of genre), which are challenging to fully represent. To address the issues above, we propose a novel structure called Facet-Aware Multi-Head Mixture-of-Experts Model for Sequential Recommendation (FAME). We leverage sub-embeddings from each head in the last multi-head attention layer to predict the next item separately. This approach captures the potential multi-faceted nature of items without increasing model complexity. A gating mechanism integrates recommendations from each head and dynamically determines their importance. Furthermore, we introduce a Mixture-of-Experts (MoE) network in each attention head to disentangle various user preferences within each facet. Each expert within the MoE focuses on a specific preference. A learnable router network is adopted to compute the importance weight for each expert and aggregate them. We conduct extensive experiments on four public sequential recommendation datasets and the results demonstrate the effectiveness of our method over existing baseline models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.01457v1-abstract-full').style.display = 'none'; document.getElementById('2411.01457v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by WSDM'25. The final camera-ready version will be available soon</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00888">arXiv:2411.00888</a> <span> [<a href="https://arxiv.org/pdf/2411.00888">pdf</a>, <a href="https://arxiv.org/format/2411.00888">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> Topology-Aware Graph Augmentation for Predicting Clinical Trajectories in Neurocognitive Disorders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qianqian Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wei Wang</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+Y">Yuqi Fang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hong-Jun Li</a>, <a href="/search/cs?searchtype=author&query=Bozoki%2C+A">Andrea Bozoki</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mingxia Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00888v1-abstract-short" style="display: inline;"> Brain networks/graphs derived from resting-state functional MRI (fMRI) help study underlying pathophysiology of neurocognitive disorders by measuring neuronal activities in the brain. Some studies utilize learning-based methods for brain network analysis, but typically suffer from low model generalizability caused by scarce labeled fMRI data. As a notable self-supervised strategy, graph contrastiv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00888v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00888v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00888v1-abstract-full" style="display: none;"> Brain networks/graphs derived from resting-state functional MRI (fMRI) help study underlying pathophysiology of neurocognitive disorders by measuring neuronal activities in the brain. Some studies utilize learning-based methods for brain network analysis, but typically suffer from low model generalizability caused by scarce labeled fMRI data. As a notable self-supervised strategy, graph contrastive learning helps leverage auxiliary unlabeled data. But existing methods generally arbitrarily perturb graph nodes/edges to generate augmented graphs, without considering essential topology information of brain networks. To this end, we propose a topology-aware graph augmentation (TGA) framework, comprising a pretext model to train a generalizable encoder on large-scale unlabeled fMRI cohorts and a task-specific model to perform downstream tasks on a small target dataset. In the pretext model, we design two novel topology-aware graph augmentation strategies: (1) hub-preserving node dropping that prioritizes preserving brain hub regions according to node importance, and (2) weight-dependent edge removing that focuses on keeping important functional connectivities based on edge weights. Experiments on 1, 688 fMRI scans suggest that TGA outperforms several state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00888v1-abstract-full').style.display = 'none'; document.getElementById('2411.00888v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00489">arXiv:2411.00489</a> <span> [<a href="https://arxiv.org/pdf/2411.00489">pdf</a>, <a href="https://arxiv.org/format/2411.00489">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Human-inspired Perspectives: A Survey on AI Long-term Memory </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=He%2C+Z">Zihong He</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+W">Weizhe Lin</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+H">Hao Zheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+F">Fan Zhang</a>, <a href="/search/cs?searchtype=author&query=Jones%2C+M">Matt Jones</a>, <a href="/search/cs?searchtype=author&query=Aitchison%2C+L">Laurence Aitchison</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xuhai Xu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Miao Liu</a>, <a href="/search/cs?searchtype=author&query=Kristensson%2C+P+O">Per Ola Kristensson</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+J">Junxiao Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00489v1-abstract-short" style="display: inline;"> With the rapid advancement of AI systems, their abilities to store, retrieve, and utilize information over the long term - referred to as long-term memory - have become increasingly significant. These capabilities are crucial for enhancing the performance of AI systems across a wide range of tasks. However, there is currently no comprehensive survey that systematically investigates AI's long-term… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00489v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00489v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00489v1-abstract-full" style="display: none;"> With the rapid advancement of AI systems, their abilities to store, retrieve, and utilize information over the long term - referred to as long-term memory - have become increasingly significant. These capabilities are crucial for enhancing the performance of AI systems across a wide range of tasks. However, there is currently no comprehensive survey that systematically investigates AI's long-term memory capabilities, formulates a theoretical framework, and inspires the development of next-generation AI long-term memory systems. This paper begins by systematically introducing the mechanisms of human long-term memory, then explores AI long-term memory mechanisms, establishing a mapping between the two. Based on the mapping relationships identified, we extend the current cognitive architectures and propose the Cognitive Architecture of Self-Adaptive Long-term Memory (SALM). SALM provides a theoretical framework for the practice of AI long-term memory and holds potential for guiding the creation of next-generation long-term memory driven AI systems. Finally, we delve into the future directions and application prospects of AI long-term memory. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00489v1-abstract-full').style.display = 'none'; document.getElementById('2411.00489v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23299">arXiv:2410.23299</a> <span> [<a href="https://arxiv.org/pdf/2410.23299">pdf</a>, <a href="https://arxiv.org/format/2410.23299">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> FVEval: Understanding Language Model Capabilities in Formal Verification of Digital Hardware </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kang%2C+M">Minwoo Kang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mingjie Liu</a>, <a href="/search/cs?searchtype=author&query=Hamad%2C+G+B">Ghaith Bany Hamad</a>, <a href="/search/cs?searchtype=author&query=Suhaib%2C+S">Syed Suhaib</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+H">Haoxing Ren</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23299v1-abstract-short" style="display: inline;"> The remarkable reasoning and code generation capabilities of large language models (LLMs) have spurred significant interest in applying LLMs to enable task automation in digital chip design. In particular, recent work has investigated early ideas of applying these models to formal verification (FV), an approach to verifying hardware implementations that can provide strong guarantees of confidence… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23299v1-abstract-full').style.display = 'inline'; document.getElementById('2410.23299v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23299v1-abstract-full" style="display: none;"> The remarkable reasoning and code generation capabilities of large language models (LLMs) have spurred significant interest in applying LLMs to enable task automation in digital chip design. In particular, recent work has investigated early ideas of applying these models to formal verification (FV), an approach to verifying hardware implementations that can provide strong guarantees of confidence but demands significant amounts of human effort. While the value of LLM-driven automation is evident, our understanding of model performance, however, has been hindered by the lack of holistic evaluation. In response, we present FVEval, the first comprehensive benchmark and evaluation framework for characterizing LLM performance in tasks pertaining to FV. The benchmark consists of three sub-tasks that measure LLM capabilities at different levels: from the generation of SystemVerilog assertions (SVAs) given natural language descriptions to reasoning about the design RTL and suggesting assertions directly without additional human input. As test instances, we present both collections of expert-written verification collateral and methodologies to scalably generate synthetic examples aligned with industrial FV workflows. A wide range of existing LLMs, both proprietary and open-source, are evaluated against FVEval, based on which we investigate where today's LLMs stand and how we might further enable their application toward improving productivity in digital FV. Our benchmark and evaluation code is available at \url{https://github.com/NVlabs/FVEval}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23299v1-abstract-full').style.display = 'none'; document.getElementById('2410.23299v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23131">arXiv:2410.23131</a> <span> [<a href="https://arxiv.org/pdf/2410.23131">pdf</a>, <a href="https://arxiv.org/format/2410.23131">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Federated Learning under Periodic Client Participation and Heterogeneous Data: A New Communication-Efficient Algorithm and Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Crawshaw%2C+M">Michael Crawshaw</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mingrui Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23131v3-abstract-short" style="display: inline;"> In federated learning, it is common to assume that clients are always available to participate in training, which may not be feasible with user devices in practice. Recent works analyze federated learning under more realistic participation patterns, such as cyclic client availability or arbitrary participation. However, all such works either require strong assumptions (e.g., all clients participat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23131v3-abstract-full').style.display = 'inline'; document.getElementById('2410.23131v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23131v3-abstract-full" style="display: none;"> In federated learning, it is common to assume that clients are always available to participate in training, which may not be feasible with user devices in practice. Recent works analyze federated learning under more realistic participation patterns, such as cyclic client availability or arbitrary participation. However, all such works either require strong assumptions (e.g., all clients participate almost surely within a bounded window), do not achieve linear speedup and reduced communication rounds, or are not applicable in the general non-convex setting. In this work, we focus on nonconvex optimization and consider participation patterns in which the chance of participation over a fixed window of rounds is equal among all clients, which includes cyclic client availability as a special case. Under this setting, we propose a new algorithm, named Amplified SCAFFOLD, and prove that it achieves linear speedup, reduced communication, and resilience to data heterogeneity simultaneously. In particular, for cyclic participation, our algorithm is proved to enjoy $\mathcal{O}(蔚^{-2})$ communication rounds to find an $蔚$-stationary point in the non-convex stochastic setting. In contrast, the prior work under the same setting requires $\mathcal{O}(魏^2 蔚^{-4})$ communication rounds, where $魏$ denotes the data heterogeneity. Therefore, our algorithm significantly reduces communication rounds due to better dependency in terms of $蔚$ and $魏$. Our analysis relies on a fine-grained treatment of the nested dependence between client participation and errors in the control variates, which results in tighter guarantees than previous work. We also provide experimental results with (1) synthetic data and (2) real-world data with a large number of clients $(N = 250)$, demonstrating the effectiveness of our algorithm under periodic client participation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23131v3-abstract-full').style.display = 'none'; document.getElementById('2410.23131v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Neurips 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23023">arXiv:2410.23023</a> <span> [<a href="https://arxiv.org/pdf/2410.23023">pdf</a>, <a href="https://arxiv.org/format/2410.23023">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> A Universal Sets-level Optimization Framework for Next Set Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuli Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Min Liu</a>, <a href="/search/cs?searchtype=author&query=Walder%2C+C">Christian Walder</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+L">Lexing Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23023v1-abstract-short" style="display: inline;"> Next Set Recommendation (NSRec), encompassing related tasks such as next basket recommendation and temporal sets prediction, stands as a trending research topic. Although numerous attempts have been made on this topic, there are certain drawbacks: (i) Existing studies are still confined to utilizing objective functions commonly found in Next Item Recommendation (NIRec), such as binary cross entrop… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23023v1-abstract-full').style.display = 'inline'; document.getElementById('2410.23023v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23023v1-abstract-full" style="display: none;"> Next Set Recommendation (NSRec), encompassing related tasks such as next basket recommendation and temporal sets prediction, stands as a trending research topic. Although numerous attempts have been made on this topic, there are certain drawbacks: (i) Existing studies are still confined to utilizing objective functions commonly found in Next Item Recommendation (NIRec), such as binary cross entropy and BPR, which are calculated based on individual item comparisons; (ii) They place emphasis on building sophisticated learning models to capture intricate dependency relationships across sequential sets, but frequently overlook pivotal dependency in their objective functions; (iii) Diversity factor within sequential sets is frequently overlooked. In this research, we endeavor to unveil a universal and S ets-level optimization framework for N ext Set Recommendation (SNSRec), offering a holistic fusion of diversity distribution and intricate dependency relationships within temporal sets. To realize this, the following contributions are made: (i) We directly model the temporal set in a sequence as a cohesive entity, leveraging the Structured Determinantal Point Process (SDPP), wherein the probabilistic DPP distribution prioritizes collections of structures (sequential sets) instead of individual items; (ii) We introduce a co-occurrence representation to discern and acknowledge the importance of different sets; (iii) We propose a sets-level optimization criterion, which integrates the diversity distribution and dependency relations across the entire sequence of sets, guiding the model to recommend relevant and diversified set. Extensive experiments on real-world datasets show that our approach consistently outperforms previous methods on both relevance and diversity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23023v1-abstract-full').style.display = 'none'; document.getElementById('2410.23023v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepter at CIKM2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22909">arXiv:2410.22909</a> <span> [<a href="https://arxiv.org/pdf/2410.22909">pdf</a>, <a href="https://arxiv.org/format/2410.22909">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> UniRiT: Towards Few-Shot Non-Rigid Point Cloud Registration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+G">Geng Li</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+H">Haozhi Cao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mingyang Liu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chenxi Jiang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jianfei Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22909v1-abstract-short" style="display: inline;"> Non-rigid point cloud registration is a critical challenge in 3D scene understanding, particularly in surgical navigation. Although existing methods achieve excellent performance when trained on large-scale, high-quality datasets, these datasets are prohibitively expensive to collect and annotate, e.g., organ data in authentic medical scenarios. With insufficient training samples and data noise, e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22909v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22909v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22909v1-abstract-full" style="display: none;"> Non-rigid point cloud registration is a critical challenge in 3D scene understanding, particularly in surgical navigation. Although existing methods achieve excellent performance when trained on large-scale, high-quality datasets, these datasets are prohibitively expensive to collect and annotate, e.g., organ data in authentic medical scenarios. With insufficient training samples and data noise, existing methods degrade significantly since non-rigid patterns are more flexible and complicated than rigid ones, and the distributions across samples are more distinct, leading to higher difficulty in representation learning with few data. In this work, we aim to deal with this challenging few-shot non-rigid point cloud registration problem. Based on the observation that complex non-rigid transformation patterns can be decomposed into rigid and small non-rigid transformations, we propose a novel and effective framework, UniRiT. UniRiT adopts a two-step registration strategy that first aligns the centroids of the source and target point clouds and then refines the registration with non-rigid transformations, thereby significantly reducing the problem complexity. To validate the performance of UniRiT on real-world datasets, we introduce a new dataset, MedMatch3D, which consists of real human organs and exhibits high variability in sample distribution. We further establish a new challenging benchmark for few-shot non-rigid registration. Extensive empirical results demonstrate that UniRiT achieves state-of-the-art performance on MedMatch3D, improving the existing best approach by 94.22%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22909v1-abstract-full').style.display = 'none'; document.getElementById('2410.22909v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 14 figures, under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22332">arXiv:2410.22332</a> <span> [<a href="https://arxiv.org/pdf/2410.22332">pdf</a>, <a href="https://arxiv.org/format/2410.22332">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Local Policies Enable Zero-shot Long-horizon Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dalal%2C+M">Murtaza Dalal</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Min Liu</a>, <a href="/search/cs?searchtype=author&query=Talbott%2C+W">Walter Talbott</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chen Chen</a>, <a href="/search/cs?searchtype=author&query=Pathak%2C+D">Deepak Pathak</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jian Zhang</a>, <a href="/search/cs?searchtype=author&query=Salakhutdinov%2C+R">Ruslan Salakhutdinov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22332v1-abstract-short" style="display: inline;"> Sim2real for robotic manipulation is difficult due to the challenges of simulating complex contacts and generating realistic task distributions. To tackle the latter problem, we introduce ManipGen, which leverages a new class of policies for sim2real transfer: local policies. Locality enables a variety of appealing properties including invariances to absolute robot and object pose, skill ordering,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22332v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22332v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22332v1-abstract-full" style="display: none;"> Sim2real for robotic manipulation is difficult due to the challenges of simulating complex contacts and generating realistic task distributions. To tackle the latter problem, we introduce ManipGen, which leverages a new class of policies for sim2real transfer: local policies. Locality enables a variety of appealing properties including invariances to absolute robot and object pose, skill ordering, and global scene configuration. We combine these policies with foundation models for vision, language and motion planning and demonstrate SOTA zero-shot performance of our method to Robosuite benchmark tasks in simulation (97%). We transfer our local policies from simulation to reality and observe they can solve unseen long-horizon manipulation tasks with up to 8 stages with significant pose, object and scene configuration variation. ManipGen outperforms SOTA approaches such as SayCan, OpenVLA, LLMTrajGen and VoxPoser across 50 real-world manipulation tasks by 36%, 76%, 62% and 60% respectively. Video results at https://mihdalal.github.io/manipgen/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22332v1-abstract-full').style.display = 'none'; document.getElementById('2410.22332v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Main paper 7 pages, 3 tables, 3 figures. Appendix 6 pages, 2 figures, 6 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22240">arXiv:2410.22240</a> <span> [<a href="https://arxiv.org/pdf/2410.22240">pdf</a>, <a href="https://arxiv.org/format/2410.22240">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Are Decoder-Only Large Language Models the Silver Bullet for Code Search? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuxuan Chen</a>, <a href="/search/cs?searchtype=author&query=Ou%2C+G">Guangsheng Ou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mingwei Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yanlin Wang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Z">Zibin Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22240v1-abstract-short" style="display: inline;"> Code search is crucial for code reuse, enabling developers to efficiently locate relevant snippets. Current methods rely on encoder-based models, which suffer from limitations such as poor generalization and restricted input lengths. Decoder-only large language models (LLMs), with their extensive pre-training, larger size, and longer input capabilities, offer potential solutions to these issues, y… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22240v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22240v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22240v1-abstract-full" style="display: none;"> Code search is crucial for code reuse, enabling developers to efficiently locate relevant snippets. Current methods rely on encoder-based models, which suffer from limitations such as poor generalization and restricted input lengths. Decoder-only large language models (LLMs), with their extensive pre-training, larger size, and longer input capabilities, offer potential solutions to these issues, yet their effectiveness in code search remains underexplored. To fill this gap, our study presents the first systematic exploration of decoder-only LLMs for code search. We evaluate nine state-of-the-art decoder-only models using two fine-tuning methods, two datasets (CSN and CoSQA$^+$), and three model sizes. Our findings reveal that fine-tuned CodeGemma significantly outperforms encoder-only models like UniXcoder, achieving a 5.57% improvement in MRR on CSN and a 49.6% increase in MAP on CoSQA$^+$ compared to zero-shot UniXcoder. These results highlight the superior performance and adaptability of decoder-only models. Additionally, we provide valuable insights into optimizing these models for code search, covering aspects such as model selection, fine-tuning methods, training data, and model size, and discussing their strengths and limitations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22240v1-abstract-full').style.display = 'none'; document.getElementById('2410.22240v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22229">arXiv:2410.22229</a> <span> [<a href="https://arxiv.org/pdf/2410.22229">pdf</a>, <a href="https://arxiv.org/format/2410.22229">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Cora: Accelerating Stateful Network Applications with SmartNICs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xi%2C+S">Shaoke Xi</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jiaqi Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mengqi Liu</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+J">Jiamin Cao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+F">Fuliang Li</a>, <a href="/search/cs?searchtype=author&query=Bu%2C+K">Kai Bu</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+K">Kui Ren</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+M">Minlan Yu</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+D">Dennis Cai</a>, <a href="/search/cs?searchtype=author&query=Zhai%2C+E">Ennan Zhai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22229v1-abstract-short" style="display: inline;"> With the growing performance requirements on networked applications, there is a new trend of offloading stateful network applications to SmartNICs to improve performance and reduce the total cost of ownership. However, offloading stateful network applications is non-trivial due to state operation complexity, state resource consumption, and the complicated relationship between traffic and state. Na… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22229v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22229v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22229v1-abstract-full" style="display: none;"> With the growing performance requirements on networked applications, there is a new trend of offloading stateful network applications to SmartNICs to improve performance and reduce the total cost of ownership. However, offloading stateful network applications is non-trivial due to state operation complexity, state resource consumption, and the complicated relationship between traffic and state. Naively partitioning the program by state or traffic can result in a suboptimal partition plan with higher CPU usage or even packet drops. In this paper, we propose Cora, a compiler and runtime that offloads stateful network applications to SmartNIC-accelerated hosts. Cora compiler introduces an accurate performance model for each SmartNIC and employs an efficient compiling algorithm to search the offloading plan. Cora runtime can monitor traffic dynamics and adapt to minimize CPU usage. Cora is built atop Netronome Agilio and BlueField 2 SmartNICs. Our evaluation shows that for the same throughput target, Cora can propose partition plans saving up to 94.0% CPU cores, 1.9 times more than baseline solutions. Under the same resource constraint, Cora can accelerate network functions by 44.9%-82.3%. Cora runtime can adapt to traffic changes and keep CPU usage low. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22229v1-abstract-full').style.display = 'none'; document.getElementById('2410.22229v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21764">arXiv:2410.21764</a> <span> [<a href="https://arxiv.org/pdf/2410.21764">pdf</a>, <a href="https://arxiv.org/format/2410.21764">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Online Mirror Descent for Tchebycheff Scalarization in Multi-Objective Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+M">Meitong Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaoyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+C">Chulin Xie</a>, <a href="/search/cs?searchtype=author&query=Donahue%2C+K">Kate Donahue</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Han Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21764v2-abstract-short" style="display: inline;"> The goal of multi-objective optimization (MOO) is to learn under multiple, potentially conflicting, objectives. One widely used technique to tackle MOO is through linear scalarization, where one fixed preference vector is used to combine the objectives into a single scalar value for optimization. However, recent work (Hu et al., 2024) has shown linear scalarization often fails to capture the non-c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21764v2-abstract-full').style.display = 'inline'; document.getElementById('2410.21764v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21764v2-abstract-full" style="display: none;"> The goal of multi-objective optimization (MOO) is to learn under multiple, potentially conflicting, objectives. One widely used technique to tackle MOO is through linear scalarization, where one fixed preference vector is used to combine the objectives into a single scalar value for optimization. However, recent work (Hu et al., 2024) has shown linear scalarization often fails to capture the non-convex regions of the Pareto Front, failing to recover the complete set of Pareto optimal solutions. In light of the above limitations, this paper focuses on Tchebycheff scalarization that optimizes for the worst-case objective. In particular, we propose an online mirror descent algorithm for Tchebycheff scalarization, which we call OMD-TCH. We show that OMD-TCH enjoys a convergence rate of $O(\sqrt{\log m/T})$ where $m$ is the number of objectives and $T$ is the number of iteration rounds. We also propose a novel adaptive online-to-batch conversion scheme that significantly improves the practical performance of OMD-TCH while maintaining the same convergence guarantees. We demonstrate the effectiveness of OMD-TCH and the adaptive conversion scheme on both synthetic problems and federated learning tasks under fairness constraints, showing state-of-the-art performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21764v2-abstract-full').style.display = 'none'; document.getElementById('2410.21764v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">26 pages, 7 figures, 2 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21257">arXiv:2410.21257</a> <span> [<a href="https://arxiv.org/pdf/2410.21257">pdf</a>, <a href="https://arxiv.org/format/2410.21257">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> One-Step Diffusion Policy: Fast Visuomotor Policies via Diffusion Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhendong Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhaoshuo Li</a>, <a href="/search/cs?searchtype=author&query=Mandlekar%2C+A">Ajay Mandlekar</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhenjia Xu</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+J">Jiaojiao Fan</a>, <a href="/search/cs?searchtype=author&query=Narang%2C+Y">Yashraj Narang</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+L">Linxi Fan</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yuke Zhu</a>, <a href="/search/cs?searchtype=author&query=Balaji%2C+Y">Yogesh Balaji</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+M">Mingyuan Zhou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming-Yu Liu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Y">Yu Zeng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21257v1-abstract-short" style="display: inline;"> Diffusion models, praised for their success in generative tasks, are increasingly being applied to robotics, demonstrating exceptional performance in behavior cloning. However, their slow generation process stemming from iterative denoising steps poses a challenge for real-time applications in resource-constrained robotics setups and dynamically changing environments. In this paper, we introduce t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21257v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21257v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21257v1-abstract-full" style="display: none;"> Diffusion models, praised for their success in generative tasks, are increasingly being applied to robotics, demonstrating exceptional performance in behavior cloning. However, their slow generation process stemming from iterative denoising steps poses a challenge for real-time applications in resource-constrained robotics setups and dynamically changing environments. In this paper, we introduce the One-Step Diffusion Policy (OneDP), a novel approach that distills knowledge from pre-trained diffusion policies into a single-step action generator, significantly accelerating response times for robotic control tasks. We ensure the distilled generator closely aligns with the original policy distribution by minimizing the Kullback-Leibler (KL) divergence along the diffusion chain, requiring only $2\%$-$10\%$ additional pre-training cost for convergence. We evaluated OneDP on 6 challenging simulation tasks as well as 4 self-designed real-world tasks using the Franka robot. The results demonstrate that OneDP not only achieves state-of-the-art success rates but also delivers an order-of-magnitude improvement in inference speed, boosting action prediction frequency from 1.5 Hz to 62 Hz, establishing its potential for dynamic and computationally constrained robotic applications. We share the project page at https://research.nvidia.com/labs/dir/onedp/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21257v1-abstract-full').style.display = 'none'; document.getElementById('2410.21257v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21048">arXiv:2410.21048</a> <span> [<a href="https://arxiv.org/pdf/2410.21048">pdf</a>, <a href="https://arxiv.org/format/2410.21048">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Pay Attention to Attention for Sequential Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuli Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Min Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaojing Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21048v1-abstract-short" style="display: inline;"> Transformer-based approaches have demonstrated remarkable success in various sequence-based tasks. However, traditional self-attention models may not sufficiently capture the intricate dependencies within items in sequential recommendation scenarios. This is due to the lack of explicit emphasis on attention weights, which play a critical role in allocating attention and understanding item-to-item… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21048v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21048v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21048v1-abstract-full" style="display: none;"> Transformer-based approaches have demonstrated remarkable success in various sequence-based tasks. However, traditional self-attention models may not sufficiently capture the intricate dependencies within items in sequential recommendation scenarios. This is due to the lack of explicit emphasis on attention weights, which play a critical role in allocating attention and understanding item-to-item correlations. To better exploit the potential of attention weights and improve the capability of sequential recommendation in learning high-order dependencies, we propose a novel sequential recommendation (SR) approach called attention weight refinement (AWRSR). AWRSR enhances the effectiveness of self-attention by additionally paying attention to attention weights, allowing for more refined attention distributions of correlations among items. We conduct comprehensive experiments on multiple real-world datasets, demonstrating that our approach consistently outperforms state-of-the-art SR models. Moreover, we provide a thorough analysis of AWRSR's effectiveness in capturing higher-level dependencies. These findings suggest that AWRSR offers a promising new direction for enhancing the performance of self-attention architecture in SR tasks, with potential applications in other sequence-based problems as well. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21048v1-abstract-full').style.display = 'none'; document.getElementById('2410.21048v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at RecSys 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20812">arXiv:2410.20812</a> <span> [<a href="https://arxiv.org/pdf/2410.20812">pdf</a>, <a href="https://arxiv.org/format/2410.20812">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Fidelity-Imposed Displacement Editing for the Learn2Reg 2024 SHG-BF Challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiang Chen</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+R">Renjiu Hu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Rongguang Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Min Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yaonan Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiazheng Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hao Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20812v1-abstract-short" style="display: inline;"> Co-examination of second-harmonic generation (SHG) and bright-field (BF) microscopy enables the differentiation of tissue components and collagen fibers, aiding the analysis of human breast and pancreatic cancer tissues. However, large discrepancies between SHG and BF images pose challenges for current learning-based registration models in aligning SHG to BF. In this paper, we propose a novel mult… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20812v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20812v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20812v1-abstract-full" style="display: none;"> Co-examination of second-harmonic generation (SHG) and bright-field (BF) microscopy enables the differentiation of tissue components and collagen fibers, aiding the analysis of human breast and pancreatic cancer tissues. However, large discrepancies between SHG and BF images pose challenges for current learning-based registration models in aligning SHG to BF. In this paper, we propose a novel multi-modal registration framework that employs fidelity-imposed displacement editing to address these challenges. The framework integrates batch-wise contrastive learning, feature-based pre-alignment, and instance-level optimization. Experimental results from the Learn2Reg COMULISglobe SHG-BF Challenge validate the effectiveness of our method, securing the 1st place on the online leaderboard. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20812v1-abstract-full').style.display = 'none'; document.getElementById('2410.20812v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20424">arXiv:2410.20424</a> <span> [<a href="https://arxiv.org/pdf/2410.20424">pdf</a>, <a href="https://arxiv.org/format/2410.20424">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> AutoKaggle: A Multi-Agent Framework for Autonomous Data Science Competitions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Ziming Li</a>, <a href="/search/cs?searchtype=author&query=Zang%2C+Q">Qianbo Zang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+D">David Ma</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jiawei Guo</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tuney Zheng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Minghao Liu</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+X">Xinyao Niu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yue Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jian Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiaheng Liu</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+W">Wanjun Zhong</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Wangchunshu Zhou</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wenhao Huang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Ge Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20424v3-abstract-short" style="display: inline;"> Data science tasks involving tabular data present complex challenges that require sophisticated problem-solving approaches. We propose AutoKaggle, a powerful and user-centric framework that assists data scientists in completing daily data pipelines through a collaborative multi-agent system. AutoKaggle implements an iterative development process that combines code execution, debugging, and compreh… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20424v3-abstract-full').style.display = 'inline'; document.getElementById('2410.20424v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20424v3-abstract-full" style="display: none;"> Data science tasks involving tabular data present complex challenges that require sophisticated problem-solving approaches. We propose AutoKaggle, a powerful and user-centric framework that assists data scientists in completing daily data pipelines through a collaborative multi-agent system. AutoKaggle implements an iterative development process that combines code execution, debugging, and comprehensive unit testing to ensure code correctness and logic consistency. The framework offers highly customizable workflows, allowing users to intervene at each phase, thus integrating automated intelligence with human expertise. Our universal data science toolkit, comprising validated functions for data cleaning, feature engineering, and modeling, forms the foundation of this solution, enhancing productivity by streamlining common tasks. We selected 8 Kaggle competitions to simulate data processing workflows in real-world application scenarios. Evaluation results demonstrate that AutoKaggle achieves a validation submission rate of 0.85 and a comprehensive score of 0.82 in typical data science pipelines, fully proving its effectiveness and practicality in handling complex data science tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20424v3-abstract-full').style.display = 'none'; document.getElementById('2410.20424v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">44 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20304">arXiv:2410.20304</a> <span> [<a href="https://arxiv.org/pdf/2410.20304">pdf</a>, <a href="https://arxiv.org/ps/2410.20304">ps</a>, <a href="https://arxiv.org/format/2410.20304">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Deep Learning, Machine Learning -- Digital Signal and Image Processing: From Theory to Application </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hsieh%2C+W">Weiche Hsieh</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Z">Ziqian Bi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junyu Liu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Benji Peng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Sen Zhang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+X">Xuanhe Pan</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jiawei Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jinlang Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keyu Chen</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+C+H">Caitlyn Heqi Yin</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+P">Pohsun Feng</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Y">Yizhu Wen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianyang Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Ming Li</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+J">Jintao Ren</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+Q">Qian Niu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Silin Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20304v1-abstract-short" style="display: inline;"> Digital Signal Processing (DSP) and Digital Image Processing (DIP) with Machine Learning (ML) and Deep Learning (DL) are popular research areas in Computer Vision and related fields. We highlight transformative applications in image enhancement, filtering techniques, and pattern recognition. By integrating frameworks like the Discrete Fourier Transform (DFT), Z-Transform, and Fourier Transform met… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20304v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20304v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20304v1-abstract-full" style="display: none;"> Digital Signal Processing (DSP) and Digital Image Processing (DIP) with Machine Learning (ML) and Deep Learning (DL) are popular research areas in Computer Vision and related fields. We highlight transformative applications in image enhancement, filtering techniques, and pattern recognition. By integrating frameworks like the Discrete Fourier Transform (DFT), Z-Transform, and Fourier Transform methods, we enable robust data manipulation and feature extraction essential for AI-driven tasks. Using Python, we implement algorithms that optimize real-time data processing, forming a foundation for scalable, high-performance solutions in computer vision. This work illustrates the potential of ML and DL to advance DSP and DIP methodologies, contributing to artificial intelligence, automated feature extraction, and applications across diverse domains. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20304v1-abstract-full').style.display = 'none'; document.getElementById('2410.20304v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">293 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20142">arXiv:2410.20142</a> <span> [<a href="https://arxiv.org/pdf/2410.20142">pdf</a>, <a href="https://arxiv.org/format/2410.20142">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Mask-based Membership Inference Attacks for Retrieval-Augmented Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mingrui Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Sixiao Zhang</a>, <a href="/search/cs?searchtype=author&query=Long%2C+C">Cheng Long</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20142v1-abstract-short" style="display: inline;"> Retrieval-Augmented Generation (RAG) has been an effective approach to mitigate hallucinations in large language models (LLMs) by incorporating up-to-date and domain-specific knowledge. Recently, there has been a trend of storing up-to-date or copyrighted data in RAG knowledge databases instead of using it for LLM training. This practice has raised concerns about Membership Inference Attacks (MIAs… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20142v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20142v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20142v1-abstract-full" style="display: none;"> Retrieval-Augmented Generation (RAG) has been an effective approach to mitigate hallucinations in large language models (LLMs) by incorporating up-to-date and domain-specific knowledge. Recently, there has been a trend of storing up-to-date or copyrighted data in RAG knowledge databases instead of using it for LLM training. This practice has raised concerns about Membership Inference Attacks (MIAs), which aim to detect if a specific target document is stored in the RAG system's knowledge database so as to protect the rights of data producers. While research has focused on enhancing the trustworthiness of RAG systems, existing MIAs for RAG systems remain largely insufficient. Previous work either relies solely on the RAG system's judgment or is easily influenced by other documents or the LLM's internal knowledge, which is unreliable and lacks explainability. To address these limitations, we propose a Mask-Based Membership Inference Attacks (MBA) framework. Our framework first employs a masking algorithm that effectively masks a certain number of words in the target document. The masked text is then used to prompt the RAG system, and the RAG system is required to predict the mask values. If the target document appears in the knowledge database, the masked text will retrieve the complete target document as context, allowing for accurate mask prediction. Finally, we adopt a simple yet effective threshold-based method to infer the membership of target document by analyzing the accuracy of mask prediction. Our mask-based approach is more document-specific, making the RAG system's generation less susceptible to distractions from other documents or the LLM's internal knowledge. Extensive experiments demonstrate the effectiveness of our approach compared to existing baseline models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20142v1-abstract-full').style.display = 'none'; document.getElementById('2410.20142v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19849">arXiv:2410.19849</a> <span> [<a href="https://arxiv.org/pdf/2410.19849">pdf</a>, <a href="https://arxiv.org/ps/2410.19849">ps</a>, <a href="https://arxiv.org/format/2410.19849">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> Deep Learning and Machine Learning -- Python Data Structures and Mathematics Fundamental: From Theory to Practice </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+S">Silin Chen</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Z">Ziqian Bi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junyu Liu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Benji Peng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Sen Zhang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+X">Xuanhe Pan</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jiawei Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jinlang Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keyu Chen</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+C+H">Caitlyn Heqi Yin</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+P">Pohsun Feng</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Y">Yizhu Wen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianyang Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Ming Li</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+J">Jintao Ren</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+Q">Qian Niu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19849v1-abstract-short" style="display: inline;"> This book provides a comprehensive introduction to the foundational concepts of machine learning (ML) and deep learning (DL). It bridges the gap between theoretical mathematics and practical application, focusing on Python as the primary programming language for implementing key algorithms and data structures. The book covers a wide range of topics, including basic and advanced Python programming,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19849v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19849v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19849v1-abstract-full" style="display: none;"> This book provides a comprehensive introduction to the foundational concepts of machine learning (ML) and deep learning (DL). It bridges the gap between theoretical mathematics and practical application, focusing on Python as the primary programming language for implementing key algorithms and data structures. The book covers a wide range of topics, including basic and advanced Python programming, fundamental mathematical operations, matrix operations, linear algebra, and optimization techniques crucial for training ML and DL models. Advanced subjects like neural networks, optimization algorithms, and frequency domain methods are also explored, along with real-world applications of large language models (LLMs) and artificial intelligence (AI) in big data management. Designed for both beginners and advanced learners, the book emphasizes the critical role of mathematical principles in developing scalable AI solutions. Practical examples and Python code are provided throughout, ensuring readers gain hands-on experience in applying theoretical knowledge to solve complex problems in ML, DL, and big data analytics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19849v1-abstract-full').style.display = 'none'; document.getElementById('2410.19849v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">298 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.19847">arXiv:2410.19847</a> <span> [<a href="https://arxiv.org/pdf/2410.19847">pdf</a>, <a href="https://arxiv.org/format/2410.19847">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> AEPL: Automated and Editable Prompt Learning for Brain Tumor Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yongheng Sun</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mingxia Liu</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+C">Chunfeng Lian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.19847v1-abstract-short" style="display: inline;"> Brain tumor segmentation is crucial for accurate diagnosisand treatment planning, but the small size and irregular shapeof tumors pose significant challenges. Existing methods of-ten fail to effectively incorporate medical domain knowledgesuch as tumor grade, which correlates with tumor aggres-siveness and morphology, providing critical insights for moreaccurate detection of tumor subregions durin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19847v1-abstract-full').style.display = 'inline'; document.getElementById('2410.19847v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.19847v1-abstract-full" style="display: none;"> Brain tumor segmentation is crucial for accurate diagnosisand treatment planning, but the small size and irregular shapeof tumors pose significant challenges. Existing methods of-ten fail to effectively incorporate medical domain knowledgesuch as tumor grade, which correlates with tumor aggres-siveness and morphology, providing critical insights for moreaccurate detection of tumor subregions during segmentation.We propose an Automated and Editable Prompt Learning(AEPL) framework that integrates tumor grade into the seg-mentation process by combining multi-task learning andprompt learning with automatic and editable prompt gen-eration. Specifically, AEPL employs an encoder to extractimage features for both tumor-grade prediction and segmen-tation mask generation. The predicted tumor grades serveas auto-generated prompts, guiding the decoder to produceprecise segmentation masks. This eliminates the need formanual prompts while allowing clinicians to manually editthe auto-generated prompts to fine-tune the segmentation,enhancing both flexibility and precision. The proposed AEPLachieves state-of-the-art performance on the BraTS 2018dataset, demonstrating its effectiveness and clinical potential.The source code can be accessed online. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.19847v1-abstract-full').style.display = 'none'; document.getElementById('2410.19847v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">4 pages paper for ISBI2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18135">arXiv:2410.18135</a> <span> [<a href="https://arxiv.org/pdf/2410.18135">pdf</a>, <a href="https://arxiv.org/format/2410.18135">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> R2Gen-Mamba: A Selective State Space Model for Radiology Report Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yongheng Sun</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+Y+Z">Yueh Z. Lee</a>, <a href="/search/cs?searchtype=author&query=Woodard%2C+G+A">Genevieve A. Woodard</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+H">Hongtu Zhu</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+C">Chunfeng Lian</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mingxia Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18135v1-abstract-short" style="display: inline;"> Radiology report generation is crucial in medical imaging,but the manual annotation process by physicians is time-consuming and labor-intensive, necessitating the develop-ment of automatic report generation methods. Existingresearch predominantly utilizes Transformers to generateradiology reports, which can be computationally intensive,limiting their use in real applications. In this work, we pres… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18135v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18135v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18135v1-abstract-full" style="display: none;"> Radiology report generation is crucial in medical imaging,but the manual annotation process by physicians is time-consuming and labor-intensive, necessitating the develop-ment of automatic report generation methods. Existingresearch predominantly utilizes Transformers to generateradiology reports, which can be computationally intensive,limiting their use in real applications. In this work, we presentR2Gen-Mamba, a novel automatic radiology report genera-tion method that leverages the efficient sequence processingof the Mamba with the contextual benefits of Transformerarchitectures. Due to lower computational complexity ofMamba, R2Gen-Mamba not only enhances training and in-ference efficiency but also produces high-quality reports.Experimental results on two benchmark datasets with morethan 210,000 X-ray image-report pairs demonstrate the ef-fectiveness of R2Gen-Mamba regarding report quality andcomputational efficiency compared with several state-of-the-art methods. The source code can be accessed online. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18135v1-abstract-full').style.display = 'none'; document.getElementById('2410.18135v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">4 pages pages for ISBI2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16506">arXiv:2410.16506</a> <span> [<a href="https://arxiv.org/pdf/2410.16506">pdf</a>, <a href="https://arxiv.org/format/2410.16506">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Functional Analysis">math.FA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> ReLU neural network approximation to piecewise constant functions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cai%2C+Z">Zhiqiang Cai</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+J">Junpyo Choi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Min Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16506v1-abstract-short" style="display: inline;"> This paper studies the approximation property of ReLU neural networks (NNs) to piecewise constant functions with unknown interfaces in bounded regions in $\mathbb{R}^d$. Under the assumption that the discontinuity interface $螕$ may be approximated by a connected series of hyperplanes with a prescribed accuracy $\varepsilon >0$, we show that a three-layer ReLU NN is sufficient to accurately approxi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16506v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16506v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16506v1-abstract-full" style="display: none;"> This paper studies the approximation property of ReLU neural networks (NNs) to piecewise constant functions with unknown interfaces in bounded regions in $\mathbb{R}^d$. Under the assumption that the discontinuity interface $螕$ may be approximated by a connected series of hyperplanes with a prescribed accuracy $\varepsilon >0$, we show that a three-layer ReLU NN is sufficient to accurately approximate any piecewise constant function and establish its error bound. Moreover, if the discontinuity interface is convex, an analytical formula of the ReLU NN approximation with exact weights and biases is provided. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16506v1-abstract-full').style.display = 'none'; document.getElementById('2410.16506v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 8 figures, submitted to the journal</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T07; 41A25; 41A46 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15584">arXiv:2410.15584</a> <span> [<a href="https://arxiv.org/pdf/2410.15584">pdf</a>, <a href="https://arxiv.org/ps/2410.15584">ps</a>, <a href="https://arxiv.org/format/2410.15584">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> Deep Learning and Machine Learning -- Object Detection and Semantic Segmentation: From Theory to Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ren%2C+J">Jintao Ren</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Z">Ziqian Bi</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+Q">Qian Niu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junyu Liu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Benji Peng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Sen Zhang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+X">Xuanhe Pan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jinlang Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keyu Chen</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+C+H">Caitlyn Heqi Yin</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+P">Pohsun Feng</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Y">Yizhu Wen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianyang Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Silin Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Ming Li</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jiawei Xu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15584v1-abstract-short" style="display: inline;"> This book offers an in-depth exploration of object detection and semantic segmentation, combining theoretical foundations with practical applications. It covers state-of-the-art advancements in machine learning and deep learning, with a focus on convolutional neural networks (CNNs), YOLO architectures, and transformer-based approaches like DETR. The book also delves into the integration of artific… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15584v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15584v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15584v1-abstract-full" style="display: none;"> This book offers an in-depth exploration of object detection and semantic segmentation, combining theoretical foundations with practical applications. It covers state-of-the-art advancements in machine learning and deep learning, with a focus on convolutional neural networks (CNNs), YOLO architectures, and transformer-based approaches like DETR. The book also delves into the integration of artificial intelligence (AI) techniques and large language models for enhanced object detection in complex environments. A thorough discussion of big data analysis is presented, highlighting the importance of data processing, model optimization, and performance evaluation metrics. By bridging the gap between traditional methods and modern deep learning frameworks, this book serves as a comprehensive guide for researchers, data scientists, and engineers aiming to leverage AI-driven methodologies in large-scale object detection tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15584v1-abstract-full').style.display = 'none'; document.getElementById('2410.15584v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">167 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15236">arXiv:2410.15236</a> <span> [<a href="https://arxiv.org/pdf/2410.15236">pdf</a>, <a href="https://arxiv.org/ps/2410.15236">ps</a>, <a href="https://arxiv.org/format/2410.15236">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Jailbreaking and Mitigation of Vulnerabilities in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Peng%2C+B">Benji Peng</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Z">Ziqian Bi</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+Q">Qian Niu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming Liu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+P">Pohsun Feng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianyang Wang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L+K+Q">Lawrence K. Q. Yan</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Y">Yizhu Wen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yichao Zhang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+C+H">Caitlyn Heqi Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15236v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have transformed artificial intelligence by advancing natural language understanding and generation, enabling applications across fields beyond healthcare, software engineering, and conversational systems. Despite these advancements in the past few years, LLMs have shown considerable vulnerabilities, particularly to prompt injection and jailbreaking attacks. This revie… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15236v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15236v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15236v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have transformed artificial intelligence by advancing natural language understanding and generation, enabling applications across fields beyond healthcare, software engineering, and conversational systems. Despite these advancements in the past few years, LLMs have shown considerable vulnerabilities, particularly to prompt injection and jailbreaking attacks. This review analyzes the state of research on these vulnerabilities and presents available defense strategies. We roughly categorize attack approaches into prompt-based, model-based, multimodal, and multilingual, covering techniques such as adversarial prompting, backdoor injections, and cross-modality exploits. We also review various defense mechanisms, including prompt filtering, transformation, alignment techniques, multi-agent defenses, and self-regulation, evaluating their strengths and shortcomings. We also discuss key metrics and benchmarks used to assess LLM safety and robustness, noting challenges like the quantification of attack success in interactive contexts and biases in existing datasets. Identifying current research gaps, we suggest future directions for resilient alignment strategies, advanced defenses against evolving attacks, automation of jailbreak detection, and consideration of ethical and societal impacts. This review emphasizes the need for continued research and cooperation within the AI community to enhance LLM security and ensure their safe deployment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15236v1-abstract-full').style.display = 'none'; document.getElementById('2410.15236v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14145">arXiv:2410.14145</a> <span> [<a href="https://arxiv.org/pdf/2410.14145">pdf</a>, <a href="https://arxiv.org/format/2410.14145">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> CAPE: A Chinese Dataset for Appraisal-based Emotional Generation using Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J+M">June M. Liu</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+H">He Cao</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+R">Renliang Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Rui Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yu Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiaxing Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14145v1-abstract-short" style="display: inline;"> Generating emotionally appropriate responses in conversations with large language models presents a significant challenge due to the complexities of human emotions and cognitive processes, which remain largely underexplored in their critical role in social interactions. In this study, we introduce a two-stage automatic data generation framework to create CAPE, a Chinese dataset named Cognitive App… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14145v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14145v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14145v1-abstract-full" style="display: none;"> Generating emotionally appropriate responses in conversations with large language models presents a significant challenge due to the complexities of human emotions and cognitive processes, which remain largely underexplored in their critical role in social interactions. In this study, we introduce a two-stage automatic data generation framework to create CAPE, a Chinese dataset named Cognitive Appraisal theory-based Emotional corpus. This corpus facilitates the generation of dialogues with contextually appropriate emotional responses by accounting for diverse personal and situational factors. We propose two tasks utilizing this dataset: emotion prediction and next utterance prediction. Both automated and human evaluations demonstrate that agents trained on our dataset can deliver responses that are more aligned with human emotional expressions. Our study shows the potential for advancing emotional expression in conversational agents, paving the way for more nuanced and meaningful human-computer interactions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14145v1-abstract-full').style.display = 'none'; document.getElementById('2410.14145v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.14045">arXiv:2410.14045</a> <span> [<a href="https://arxiv.org/pdf/2410.14045">pdf</a>, <a href="https://arxiv.org/format/2410.14045">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Human Action Anticipation: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lai%2C+B">Bolin Lai</a>, <a href="/search/cs?searchtype=author&query=Toyer%2C+S">Sam Toyer</a>, <a href="/search/cs?searchtype=author&query=Nagarajan%2C+T">Tushar Nagarajan</a>, <a href="/search/cs?searchtype=author&query=Girdhar%2C+R">Rohit Girdhar</a>, <a href="/search/cs?searchtype=author&query=Zha%2C+S">Shengxin Zha</a>, <a href="/search/cs?searchtype=author&query=Rehg%2C+J+M">James M. Rehg</a>, <a href="/search/cs?searchtype=author&query=Kitani%2C+K">Kris Kitani</a>, <a href="/search/cs?searchtype=author&query=Grauman%2C+K">Kristen Grauman</a>, <a href="/search/cs?searchtype=author&query=Desai%2C+R">Ruta Desai</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Miao Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.14045v1-abstract-short" style="display: inline;"> Predicting future human behavior is an increasingly popular topic in computer vision, driven by the interest in applications such as autonomous vehicles, digital assistants and human-robot interactions. The literature on behavior prediction spans various tasks, including action anticipation, activity forecasting, intent prediction, goal prediction, and so on. Our survey aims to tie together this f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14045v1-abstract-full').style.display = 'inline'; document.getElementById('2410.14045v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.14045v1-abstract-full" style="display: none;"> Predicting future human behavior is an increasingly popular topic in computer vision, driven by the interest in applications such as autonomous vehicles, digital assistants and human-robot interactions. The literature on behavior prediction spans various tasks, including action anticipation, activity forecasting, intent prediction, goal prediction, and so on. Our survey aims to tie together this fragmented literature, covering recent technical innovations as well as the development of new large-scale datasets for model training and evaluation. We also summarize the widely-used metrics for different tasks and provide a comprehensive performance comparison of existing approaches on eleven action anticipation datasets. This survey serves as not only a reference for contemporary methodologies in action anticipation, but also a guideline for future research direction of this evolving landscape. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.14045v1-abstract-full').style.display = 'none'; document.getElementById('2410.14045v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">30 pages, 9 figures, 12 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13639">arXiv:2410.13639</a> <span> [<a href="https://arxiv.org/pdf/2410.13639">pdf</a>, <a href="https://arxiv.org/format/2410.13639">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> A Comparative Study on Reasoning Patterns of OpenAI's o1 Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+S">Siwei Wu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+Z">Zhongyuan Peng</a>, <a href="/search/cs?searchtype=author&query=Du%2C+X">Xinrun Du</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tuney Zheng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Minghao Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jialong Wu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jiachen Ma</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yizhi Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jian Yang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Wangchunshu Zhou</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Q">Qunshu Lin</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Junbo Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhaoxiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wenhao Huang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Ge Zhang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+C">Chenghua Lin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J+H">J. H. Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13639v2-abstract-short" style="display: inline;"> Enabling Large Language Models (LLMs) to handle a wider range of complex tasks (e.g., coding, math) has drawn great attention from many researchers. As LLMs continue to evolve, merely increasing the number of model parameters yields diminishing performance improvements and heavy computational costs. Recently, OpenAI's o1 model has shown that inference strategies (i.e., Test-time Compute methods) c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13639v2-abstract-full').style.display = 'inline'; document.getElementById('2410.13639v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13639v2-abstract-full" style="display: none;"> Enabling Large Language Models (LLMs) to handle a wider range of complex tasks (e.g., coding, math) has drawn great attention from many researchers. As LLMs continue to evolve, merely increasing the number of model parameters yields diminishing performance improvements and heavy computational costs. Recently, OpenAI's o1 model has shown that inference strategies (i.e., Test-time Compute methods) can also significantly enhance the reasoning capabilities of LLMs. However, the mechanisms behind these methods are still unexplored. In our work, to investigate the reasoning patterns of o1, we compare o1 with existing Test-time Compute methods (BoN, Step-wise BoN, Agent Workflow, and Self-Refine) by using OpenAI's GPT-4o as a backbone on general reasoning benchmarks in three domains (i.e., math, coding, commonsense reasoning). Specifically, first, our experiments show that the o1 model has achieved the best performance on most datasets. Second, as for the methods of searching diverse responses (e.g., BoN), we find the reward models' capability and the search space both limit the upper boundary of these methods. Third, as for the methods that break the problem into many sub-problems, the Agent Workflow has achieved better performance than Step-wise BoN due to the domain-specific system prompt for planning better reasoning processes. Fourth, it is worth mentioning that we have summarized six reasoning patterns of o1, and provided a detailed analysis on several reasoning benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13639v2-abstract-full').style.display = 'none'; document.getElementById('2410.13639v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11560">arXiv:2410.11560</a> <span> [<a href="https://arxiv.org/pdf/2410.11560">pdf</a>, <a href="https://arxiv.org/format/2410.11560">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PSVMA+: Exploring Multi-granularity Semantic-visual Adaption for Generalized Zero-shot Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+M">Man Liu</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+H">Huihui Bai</a>, <a href="/search/cs?searchtype=author&query=Li%2C+F">Feng Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chunjie Zhang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Y">Yunchao Wei</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+M">Meng Wang</a>, <a href="/search/cs?searchtype=author&query=Chua%2C+T">Tat-Seng Chua</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yao Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11560v1-abstract-short" style="display: inline;"> Generalized zero-shot learning (GZSL) endeavors to identify the unseen categories using knowledge from the seen domain, necessitating the intrinsic interactions between the visual features and attribute semantic features. However, GZSL suffers from insufficient visual-semantic correspondences due to the attribute diversity and instance diversity. Attribute diversity refers to varying semantic gran… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11560v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11560v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11560v1-abstract-full" style="display: none;"> Generalized zero-shot learning (GZSL) endeavors to identify the unseen categories using knowledge from the seen domain, necessitating the intrinsic interactions between the visual features and attribute semantic features. However, GZSL suffers from insufficient visual-semantic correspondences due to the attribute diversity and instance diversity. Attribute diversity refers to varying semantic granularity in attribute descriptions, ranging from low-level (specific, directly observable) to high-level (abstract, highly generic) characteristics. This diversity challenges the collection of adequate visual cues for attributes under a uni-granularity. Additionally, diverse visual instances corresponding to the same sharing attributes introduce semantic ambiguity, leading to vague visual patterns. To tackle these problems, we propose a multi-granularity progressive semantic-visual mutual adaption (PSVMA+) network, where sufficient visual elements across granularity levels can be gathered to remedy the granularity inconsistency. PSVMA+ explores semantic-visual interactions at different granularity levels, enabling awareness of multi-granularity in both visual and semantic elements. At each granularity level, the dual semantic-visual transformer module (DSVTM) recasts the sharing attributes into instance-centric attributes and aggregates the semantic-related visual regions, thereby learning unambiguous visual features to accommodate various instances. Given the diverse contributions of different granularities, PSVMA+ employs selective cross-granularity learning to leverage knowledge from reliable granularities and adaptively fuses multi-granularity features for comprehensive representations. Experimental results demonstrate that PSVMA+ consistently outperforms state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11560v1-abstract-full').style.display = 'none'; document.getElementById('2410.11560v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to TPAMI 2024. arXiv admin note: text overlap with arXiv:2303.15322</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10872">arXiv:2410.10872</a> <span> [<a href="https://arxiv.org/pdf/2410.10872">pdf</a>, <a href="https://arxiv.org/format/2410.10872">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> ToolBridge: An Open-Source Dataset to Equip LLMs with External Tool Capabilities </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jin%2C+Z">Zhenchao Jin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mengchen Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+D">Dongdong Chen</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+L">Lingting Zhu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yunsheng Li</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+L">Lequan Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10872v1-abstract-short" style="display: inline;"> Through the integration of external tools, large language models (LLMs) such as GPT-4o and Llama 3.1 significantly expand their functional capabilities, evolving from elementary conversational agents to general-purpose assistants. We argue that the primary drivers of these advancements are the quality and diversity of the training data. However, the existing LLMs with external tool integration pro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10872v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10872v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10872v1-abstract-full" style="display: none;"> Through the integration of external tools, large language models (LLMs) such as GPT-4o and Llama 3.1 significantly expand their functional capabilities, evolving from elementary conversational agents to general-purpose assistants. We argue that the primary drivers of these advancements are the quality and diversity of the training data. However, the existing LLMs with external tool integration provide only limited transparency regarding their datasets and data collection methods, which has led to the initiation of this research. Specifically, in this paper, our objective is to elucidate the detailed process involved in constructing datasets that empower LLMs to effectively learn how to utilize external tools and make this information available to the public through the introduction of ToolBridge. ToolBridge proposes to employ a collection of general open-access datasets as its raw dataset pool and applies a series of strategies to identify appropriate data entries from the pool for external tool API insertions. By supervised fine-tuning on these curated data entries, LLMs can invoke external tools in appropriate contexts to boost their predictive accuracy, particularly for basic functions including data processing, numerical computation, and factual retrieval. Our experiments rigorously isolates model architectures and training configurations, focusing exclusively on the role of data. The experimental results indicate that LLMs trained on ToolBridge demonstrate consistent performance improvements on both standard benchmarks and custom evaluation datasets. All the associated code and data will be open-source at https://github.com/CharlesPikachu/ToolBridge, promoting transparency and facilitating the broader community to explore approaches for equipping LLMs with external tools capabilities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10872v1-abstract-full').style.display = 'none'; document.getElementById('2410.10872v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">technical report</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10604">arXiv:2410.10604</a> <span> [<a href="https://arxiv.org/pdf/2410.10604">pdf</a>, <a href="https://arxiv.org/format/2410.10604">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> BrainMVP: Multi-modal Vision Pre-training for Brain Image Analysis using Multi-parametric MRI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rui%2C+S">Shaohao Rui</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Lingzhi Chen</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Z">Zhenyu Tang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lilong Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mianxin Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shaoting Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaosong Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10604v1-abstract-short" style="display: inline;"> Accurate diagnosis of brain abnormalities is greatly enhanced by the inclusion of complementary multi-parametric MRI imaging data. There is significant potential to develop a universal pre-training model that can be quickly adapted for image modalities and various clinical scenarios. However, current models often rely on uni-modal image data, neglecting the cross-modal correlations among different… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10604v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10604v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10604v1-abstract-full" style="display: none;"> Accurate diagnosis of brain abnormalities is greatly enhanced by the inclusion of complementary multi-parametric MRI imaging data. There is significant potential to develop a universal pre-training model that can be quickly adapted for image modalities and various clinical scenarios. However, current models often rely on uni-modal image data, neglecting the cross-modal correlations among different image modalities or struggling to scale up pre-training in the presence of missing modality data. In this paper, we propose BrainMVP, a multi-modal vision pre-training framework for brain image analysis using multi-parametric MRI scans. First, we collect 16,022 brain MRI scans (over 2.4 million images), encompassing eight MRI modalities sourced from a diverse range of centers and devices. Then, a novel pre-training paradigm is proposed for the multi-modal MRI data, addressing the issue of missing modalities and achieving multi-modal information fusion. Cross-modal reconstruction is explored to learn distinctive brain image embeddings and efficient modality fusion capabilities. A modality-wise data distillation module is proposed to extract the essence representation of each MR image modality for both the pre-training and downstream application purposes. Furthermore, we introduce a modality-aware contrastive learning module to enhance the cross-modality association within a study. Extensive experiments on downstream tasks demonstrate superior performance compared to state-of-the-art pre-training methods in the medical domain, with Dice Score improvement of 0.28%-14.47% across six segmentation benchmarks and a consistent accuracy improvement of 0.65%-18.07% in four individual classification tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10604v1-abstract-full').style.display = 'none'; document.getElementById('2410.10604v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Liu%2C+M&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Liu%2C+M&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+M&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+M&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+M&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Liu%2C+M&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository