CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 160 results for author: <span class="mathjax">Wei, G</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Wei%2C+G">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Wei, G"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Wei%2C+G&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Wei, G"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Wei%2C+G&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Wei%2C+G&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+G&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+G&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+G&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03738">arXiv:2502.03738</a> <span> [<a href="https://arxiv.org/pdf/2502.03738">pdf</a>, <a href="https://arxiv.org/format/2502.03738">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Scaling Laws in Patchification: An Image Is Worth 50,176 Tokens And More </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+F">Feng Wang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yaodong Yu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guoyizhe Wei</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+W">Wei Shao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yuyin Zhou</a>, <a href="/search/cs?searchtype=author&query=Yuille%2C+A">Alan Yuille</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+C">Cihang Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03738v1-abstract-short" style="display: inline;"> Since the introduction of Vision Transformer (ViT), patchification has long been regarded as a de facto image tokenization approach for plain visual architectures. By compressing the spatial size of images, this approach can effectively shorten the token sequence and reduce the computational cost of ViT-like plain architectures. In this work, we aim to thoroughly examine the information loss cause… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03738v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03738v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03738v1-abstract-full" style="display: none;"> Since the introduction of Vision Transformer (ViT), patchification has long been regarded as a de facto image tokenization approach for plain visual architectures. By compressing the spatial size of images, this approach can effectively shorten the token sequence and reduce the computational cost of ViT-like plain architectures. In this work, we aim to thoroughly examine the information loss caused by this patchification-based compressive encoding paradigm and how it affects visual understanding. We conduct extensive patch size scaling experiments and excitedly observe an intriguing scaling law in patchification: the models can consistently benefit from decreased patch sizes and attain improved predictive performance, until it reaches the minimum patch size of 1x1, i.e., pixel tokenization. This conclusion is broadly applicable across different vision tasks, various input scales, and diverse architectures such as ViT and the recent Mamba models. Moreover, as a by-product, we discover that with smaller patches, task-specific decoder heads become less critical for dense prediction. In the experiments, we successfully scale up the visual sequence to an exceptional length of 50,176 tokens, achieving a competitive test accuracy of 84.6% with a base-sized model on the ImageNet-1k benchmark. We hope this study can provide insights and theoretical foundations for future works of building non-compressive vision models. Code is available at https://github.com/wangf3014/Patch_Scaling. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03738v1-abstract-full').style.display = 'none'; document.getElementById('2502.03738v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12599">arXiv:2501.12599</a> <span> [<a href="https://arxiv.org/pdf/2501.12599">pdf</a>, <a href="https://arxiv.org/format/2501.12599">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Kimi k1.5: Scaling Reinforcement Learning with LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kimi+Team"> Kimi Team</a>, <a href="/search/cs?searchtype=author&query=Du%2C+A">Angang Du</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+B">Bofei Gao</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+B">Bowei Xing</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Changjiu Jiang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Cheng Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Cheng Li</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+C">Chenjun Xiao</a>, <a href="/search/cs?searchtype=author&query=Du%2C+C">Chenzhuang Du</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+C">Chonghua Liao</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+C">Chuning Tang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Congcong Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Dehao Zhang</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+E">Enming Yuan</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+E">Enzhe Lu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+F">Fengxiang Tang</a>, <a href="/search/cs?searchtype=author&query=Sung%2C+F">Flood Sung</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guangda Wei</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+G">Guokun Lai</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+H">Haiqing Guo</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+H">Han Zhu</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+H">Hao Ding</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+H">Hao Hu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hao Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hao Zhang</a> , et al. (69 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12599v1-abstract-short" style="display: inline;"> Language model pretraining with next token prediction has proved effective for scaling compute but is limited to the amount of available training data. Scaling reinforcement learning (RL) unlocks a new axis for the continued improvement of artificial intelligence, with the promise that large language models (LLMs) can scale their training data by learning to explore with rewards. However, prior pu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12599v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12599v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12599v1-abstract-full" style="display: none;"> Language model pretraining with next token prediction has proved effective for scaling compute but is limited to the amount of available training data. Scaling reinforcement learning (RL) unlocks a new axis for the continued improvement of artificial intelligence, with the promise that large language models (LLMs) can scale their training data by learning to explore with rewards. However, prior published work has not produced competitive results. In light of this, we report on the training practice of Kimi k1.5, our latest multi-modal LLM trained with RL, including its RL training techniques, multi-modal data recipes, and infrastructure optimization. Long context scaling and improved policy optimization methods are key ingredients of our approach, which establishes a simplistic, effective RL framework without relying on more complex techniques such as Monte Carlo tree search, value functions, and process reward models. Notably, our system achieves state-of-the-art reasoning performance across multiple benchmarks and modalities -- e.g., 77.5 on AIME, 96.2 on MATH 500, 94-th percentile on Codeforces, 74.9 on MathVista -- matching OpenAI's o1. Moreover, we present effective long2short methods that use long-CoT techniques to improve short-CoT models, yielding state-of-the-art short-CoT reasoning results -- e.g., 60.8 on AIME, 94.6 on MATH500, 47.3 on LiveCodeBench -- outperforming existing short-CoT models such as GPT-4o and Claude Sonnet 3.5 by a large margin (up to +550%). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12599v1-abstract-full').style.display = 'none'; document.getElementById('2501.12599v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.07139">arXiv:2501.07139</a> <span> [<a href="https://arxiv.org/pdf/2501.07139">pdf</a>, <a href="https://arxiv.org/format/2501.07139">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> FlexQuant: Elastic Quantization Framework for Locally Hosted LLM on Edge Devices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chai%2C+Y">Yuji Chai</a>, <a href="/search/cs?searchtype=author&query=Kwen%2C+M">Mujin Kwen</a>, <a href="/search/cs?searchtype=author&query=Brooks%2C+D">David Brooks</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Gu-Yeon Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.07139v1-abstract-short" style="display: inline;"> Deploying LLMs on edge devices presents serious technical challenges. Memory elasticity is crucial for edge devices with unified memory, where memory is shared and fluctuates dynamically. Existing solutions suffer from either poor transition granularity or high storage costs. We propose FlexQuant, a novel elasticity framework that generates an ensemble of quantized models, providing an elastic hos… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07139v1-abstract-full').style.display = 'inline'; document.getElementById('2501.07139v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.07139v1-abstract-full" style="display: none;"> Deploying LLMs on edge devices presents serious technical challenges. Memory elasticity is crucial for edge devices with unified memory, where memory is shared and fluctuates dynamically. Existing solutions suffer from either poor transition granularity or high storage costs. We propose FlexQuant, a novel elasticity framework that generates an ensemble of quantized models, providing an elastic hosting solution with 15x granularity improvement and 10x storage reduction compared to SoTA methods. FlexQuant works with most quantization methods and creates a family of trade-off options under various storage limits through our pruning method. It brings great performance and flexibility to the edge deployment of LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07139v1-abstract-full').style.display = 'none'; document.getElementById('2501.07139v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02824">arXiv:2501.02824</a> <span> [<a href="https://arxiv.org/pdf/2501.02824">pdf</a>, <a href="https://arxiv.org/format/2501.02824">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Proteomic Learning of Gamma-Aminobutyric Acid (GABA) Receptor-Mediated Anesthesia </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Jian Jiang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Long Chen</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yueying Zhu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yazhou Shi</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+H">Huahai Qiu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Bengong Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+T">Tianshou Zhou</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02824v1-abstract-short" style="display: inline;"> Anesthetics are crucial in surgical procedures and therapeutic interventions, but they come with side effects and varying levels of effectiveness, calling for novel anesthetic agents that offer more precise and controllable effects. Targeting Gamma-aminobutyric acid (GABA) receptors, the primary inhibitory receptors in the central nervous system, could enhance their inhibitory action, potentially… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02824v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02824v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02824v1-abstract-full" style="display: none;"> Anesthetics are crucial in surgical procedures and therapeutic interventions, but they come with side effects and varying levels of effectiveness, calling for novel anesthetic agents that offer more precise and controllable effects. Targeting Gamma-aminobutyric acid (GABA) receptors, the primary inhibitory receptors in the central nervous system, could enhance their inhibitory action, potentially reducing side effects while improving the potency of anesthetics. In this study, we introduce a proteomic learning of GABA receptor-mediated anesthesia based on 24 GABA receptor subtypes by considering over 4000 proteins in protein-protein interaction (PPI) networks and over 1.5 millions known binding compounds. We develop a corresponding drug-target interaction network to identify potential lead compounds for novel anesthetic design. To ensure robust proteomic learning predictions, we curated a dataset comprising 136 targets from a pool of 980 targets within the PPI networks. We employed three machine learning algorithms, integrating advanced natural language processing (NLP) models such as pretrained transformer and autoencoder embeddings. Through a comprehensive screening process, we evaluated the side effects and repurposing potential of over 180,000 drug candidates targeting the GABRA5 receptor. Additionally, we assessed the ADMET (absorption, distribution, metabolism, excretion, and toxicity) properties of these candidates to identify those with near-optimal characteristics. This approach also involved optimizing the structures of existing anesthetics. Our work presents an innovative strategy for the development of new anesthetic drugs, optimization of anesthetic use, and deeper understanding of potential anesthesia-related side effects. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02824v1-abstract-full').style.display = 'none'; document.getElementById('2501.02824v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.19821">arXiv:2412.19821</a> <span> [<a href="https://arxiv.org/pdf/2412.19821">pdf</a>, <a href="https://arxiv.org/format/2412.19821">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Nanoscaling Floating-Point (NxFP): NanoMantissa, Adaptive Microexponents, and Code Recycling for Direct-Cast Compression of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lo%2C+Y">Yun-Chen Lo</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Gu-Yeon Wei</a>, <a href="/search/cs?searchtype=author&query=Brooks%2C+D">David Brooks</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.19821v1-abstract-short" style="display: inline;"> As cutting-edge large language models (LLMs) continue to transform various industries, their fast-growing model size and sequence length have led to memory traffic and capacity challenges. Recently, AMD, Arm, Intel, Meta, Microsoft, NVIDIA, and Qualcomm have proposed a Microscaling standard (Mx), which augments block floating-point with microexponents to achieve promising perplexity-to-footprint t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19821v1-abstract-full').style.display = 'inline'; document.getElementById('2412.19821v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.19821v1-abstract-full" style="display: none;"> As cutting-edge large language models (LLMs) continue to transform various industries, their fast-growing model size and sequence length have led to memory traffic and capacity challenges. Recently, AMD, Arm, Intel, Meta, Microsoft, NVIDIA, and Qualcomm have proposed a Microscaling standard (Mx), which augments block floating-point with microexponents to achieve promising perplexity-to-footprint trade-offs. However, the Microscaling suffers from significant perplexity degradation on modern LLMs with less than six bits. This paper profiles modern LLMs and identifies three main challenges of low-bit Microscaling format, i.e., inaccurate tracking of outliers, vacant quantization levels, and wasted binary code. In response, Nanoscaling (NxFP) proposes three techniques, i.e., NanoMantissa, Adaptive Microexponent, and Code Recycling to enable better accuracy and smaller memory footprint than state-of-the-art MxFP. Experimental results on direct-cast inference across various modern LLMs demonstrate that our proposed methods outperform state-of-the-art MxFP by up to 0.64 in perplexity and by up to 30% in accuracy on MMLU benchmarks. Furthermore, NxFP reduces memory footprint by up to 16% while achieving comparable perplexity as MxFP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19821v1-abstract-full').style.display = 'none'; document.getElementById('2412.19821v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 12 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.7; E.4 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.16583">arXiv:2412.16583</a> <span> [<a href="https://arxiv.org/pdf/2412.16583">pdf</a>, <a href="https://arxiv.org/format/2412.16583">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> REO-VLM: Transforming VLM to Meet Regression Challenges in Earth Observation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xue%2C+X">Xizhe Xue</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guoting Wei</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hao Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haokui Zhang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+F">Feng Lin</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+C">Chunhua Shen</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+X+X">Xiao Xiang Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.16583v1-abstract-short" style="display: inline;"> The rapid evolution of Vision Language Models (VLMs) has catalyzed significant advancements in artificial intelligence, expanding research across various disciplines, including Earth Observation (EO). While VLMs have enhanced image understanding and data processing within EO, their applications have predominantly focused on image content description. This limited focus overlooks their potential in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16583v1-abstract-full').style.display = 'inline'; document.getElementById('2412.16583v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.16583v1-abstract-full" style="display: none;"> The rapid evolution of Vision Language Models (VLMs) has catalyzed significant advancements in artificial intelligence, expanding research across various disciplines, including Earth Observation (EO). While VLMs have enhanced image understanding and data processing within EO, their applications have predominantly focused on image content description. This limited focus overlooks their potential in geographic and scientific regression tasks, which are essential for diverse EO applications. To bridge this gap, this paper introduces a novel benchmark dataset, called \textbf{REO-Instruct} to unify regression and generation tasks specifically for the EO domain. Comprising 1.6 million multimodal EO imagery and language pairs, this dataset is designed to support both biomass regression and image content interpretation tasks. Leveraging this dataset, we develop \textbf{REO-VLM}, a groundbreaking model that seamlessly integrates regression capabilities with traditional generative functions. By utilizing language-driven reasoning to incorporate scientific domain knowledge, REO-VLM goes beyond solely relying on EO imagery, enabling comprehensive interpretation of complex scientific attributes from EO data. This approach establishes new performance benchmarks and significantly enhances the capabilities of environmental monitoring and resource management. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16583v1-abstract-full').style.display = 'none'; document.getElementById('2412.16583v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.19036">arXiv:2411.19036</a> <span> [<a href="https://arxiv.org/pdf/2411.19036">pdf</a>, <a href="https://arxiv.org/format/2411.19036">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> PCDreamer: Point Cloud Completion Through Multi-view Diffusion Priors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guangshun Wei</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Y">Yuan Feng</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Long Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chen Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yuanfeng Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Changjian Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.19036v1-abstract-short" style="display: inline;"> This paper presents PCDreamer, a novel method for point cloud completion. Traditional methods typically extract features from partial point clouds to predict missing regions, but the large solution space often leads to unsatisfactory results. More recent approaches have started to use images as extra guidance, effectively improving performance, but obtaining paired data of images and partial point… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19036v1-abstract-full').style.display = 'inline'; document.getElementById('2411.19036v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.19036v1-abstract-full" style="display: none;"> This paper presents PCDreamer, a novel method for point cloud completion. Traditional methods typically extract features from partial point clouds to predict missing regions, but the large solution space often leads to unsatisfactory results. More recent approaches have started to use images as extra guidance, effectively improving performance, but obtaining paired data of images and partial point clouds is challenging in practice. To overcome these limitations, we harness the relatively view-consistent multi-view diffusion priors within large models, to generate novel views of the desired shape. The resulting image set encodes both global and local shape cues, which is especially beneficial for shape completion. To fully exploit the priors, we have designed a shape fusion module for producing an initial complete shape from multi-modality input (\ie, images and point clouds), and a follow-up shape consolidation module to obtain the final complete shape by discarding unreliable points introduced by the inconsistency from diffusion priors. Extensive experimental results demonstrate our superior performance, especially in recovering fine details. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.19036v1-abstract-full').style.display = 'none'; document.getElementById('2411.19036v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18477">arXiv:2410.18477</a> <span> [<a href="https://arxiv.org/pdf/2410.18477">pdf</a>, <a href="https://arxiv.org/format/2410.18477">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Monge-Ampere Regularization for Learning Arbitrary Shapes from Point Clouds </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chuanxiang Yang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yuanfeng Zhou</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guangshun Wei</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Long Ma</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+J">Junhui Hou</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuan Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wenping Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18477v1-abstract-short" style="display: inline;"> As commonly used implicit geometry representations, the signed distance function (SDF) is limited to modeling watertight shapes, while the unsigned distance function (UDF) is capable of representing various surfaces. However, its inherent theoretical shortcoming, i.e., the non-differentiability at the zero level set, would result in sub-optimal reconstruction quality. In this paper, we propose the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18477v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18477v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18477v1-abstract-full" style="display: none;"> As commonly used implicit geometry representations, the signed distance function (SDF) is limited to modeling watertight shapes, while the unsigned distance function (UDF) is capable of representing various surfaces. However, its inherent theoretical shortcoming, i.e., the non-differentiability at the zero level set, would result in sub-optimal reconstruction quality. In this paper, we propose the scaled-squared distance function (S$^{2}$DF), a novel implicit surface representation for modeling arbitrary surface types. S$^{2}$DF does not distinguish between inside and outside regions while effectively addressing the non-differentiability issue of UDF at the zero level set. We demonstrate that S$^{2}$DF satisfies a second-order partial differential equation of Monge-Ampere-type, allowing us to develop a learning pipeline that leverages a novel Monge-Ampere regularization to directly learn S$^{2}$DF from raw unoriented point clouds without supervision from ground-truth S$^{2}$DF values. Extensive experiments across multiple datasets show that our method significantly outperforms state-of-the-art supervised approaches that require ground-truth surface information as supervision for training. The code will be publicly available at https://github.com/chuanxiang-yang/S2DF. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18477v1-abstract-full').style.display = 'none'; document.getElementById('2410.18477v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17283">arXiv:2410.17283</a> <span> [<a href="https://arxiv.org/pdf/2410.17283">pdf</a>, <a href="https://arxiv.org/format/2410.17283">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Advancements in Visual Language Models for Remote Sensing: Datasets, Capabilities, and Enhancement Techniques </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tao%2C+L">Lijie Tao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haokui Zhang</a>, <a href="/search/cs?searchtype=author&query=Jing%2C+H">Haizhao Jing</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yu Liu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+D">Dawei Yan</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guoting Wei</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+X">Xizhe Xue</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17283v3-abstract-short" style="display: inline;"> Recently, the remarkable success of ChatGPT has sparked a renewed wave of interest in artificial intelligence (AI), and the advancements in visual language models (VLMs) have pushed this enthusiasm to new heights. Differring from previous AI approaches that generally formulated different tasks as discriminative models, VLMs frame tasks as generative models and align language with visual informatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17283v3-abstract-full').style.display = 'inline'; document.getElementById('2410.17283v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17283v3-abstract-full" style="display: none;"> Recently, the remarkable success of ChatGPT has sparked a renewed wave of interest in artificial intelligence (AI), and the advancements in visual language models (VLMs) have pushed this enthusiasm to new heights. Differring from previous AI approaches that generally formulated different tasks as discriminative models, VLMs frame tasks as generative models and align language with visual information, enabling the handling of more challenging problems. The remote sensing (RS) field, a highly practical domain, has also embraced this new trend and introduced several VLM-based RS methods that have demonstrated promising performance and enormous potential. In this paper, we first review the fundamental theories related to VLM, then summarize the datasets constructed for VLMs in remote sensing and the various tasks they addressed. Finally, we categorize the improvement methods into three main parts according to the core components of VLMs and provide a detailed introduction and comparison of these methods. A project associated with this review has been created at https://github.com/taolijie11111/VLMs-in-RS-review. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17283v3-abstract-full').style.display = 'none'; document.getElementById('2410.17283v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.07599">arXiv:2410.07599</a> <span> [<a href="https://arxiv.org/pdf/2410.07599">pdf</a>, <a href="https://arxiv.org/format/2410.07599">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Causal Image Modeling for Efficient Visual Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+F">Feng Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+T">Timing Yang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yaodong Yu</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+S">Sucheng Ren</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guoyizhe Wei</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+A">Angtian Wang</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+W">Wei Shao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yuyin Zhou</a>, <a href="/search/cs?searchtype=author&query=Yuille%2C+A">Alan Yuille</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+C">Cihang Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.07599v1-abstract-short" style="display: inline;"> In this work, we present a comprehensive analysis of causal image modeling and introduce the Adventurer series models where we treat images as sequences of patch tokens and employ uni-directional language models to learn visual representations. This modeling paradigm allows us to process images in a recurrent formulation with linear complexity relative to the sequence length, which can effectively… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07599v1-abstract-full').style.display = 'inline'; document.getElementById('2410.07599v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.07599v1-abstract-full" style="display: none;"> In this work, we present a comprehensive analysis of causal image modeling and introduce the Adventurer series models where we treat images as sequences of patch tokens and employ uni-directional language models to learn visual representations. This modeling paradigm allows us to process images in a recurrent formulation with linear complexity relative to the sequence length, which can effectively address the memory and computation explosion issues posed by high-resolution and fine-grained images. In detail, we introduce two simple designs that seamlessly integrate image inputs into the causal inference framework: a global pooling token placed at the beginning of the sequence and a flipping operation between every two layers. Extensive empirical studies demonstrate the significant efficiency and effectiveness of this causal image modeling paradigm. For example, our base-sized Adventurer model attains a competitive test accuracy of 84.0% on the standard ImageNet-1k benchmark with 216 images/s training throughput, which is 5.3 times more efficient than vision transformers to achieve the same result. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.07599v1-abstract-full').style.display = 'none'; document.getElementById('2410.07599v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.20423">arXiv:2409.20423</a> <span> [<a href="https://arxiv.org/pdf/2409.20423">pdf</a>, <a href="https://arxiv.org/format/2409.20423">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Stream-level flow matching with Gaussian processes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+G">Ganchao Wei</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Li Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.20423v5-abstract-short" style="display: inline;"> Flow matching (FM) is a family of training algorithms for fitting continuous normalizing flows (CNFs). Conditional flow matching (CFM) exploits the fact that the marginal vector field of a CNF can be learned by fitting least-squares regression to the conditional vector field specified given one or both ends of the flow path. In this paper, we extend the CFM algorithm by defining conditional probab… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.20423v5-abstract-full').style.display = 'inline'; document.getElementById('2409.20423v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.20423v5-abstract-full" style="display: none;"> Flow matching (FM) is a family of training algorithms for fitting continuous normalizing flows (CNFs). Conditional flow matching (CFM) exploits the fact that the marginal vector field of a CNF can be learned by fitting least-squares regression to the conditional vector field specified given one or both ends of the flow path. In this paper, we extend the CFM algorithm by defining conditional probability paths along ``streams'', instances of latent stochastic paths that connect data pairs of source and target, which are modeled with Gaussian process (GP) distributions. The unique distributional properties of GPs help preserve the ``simulation-free" nature of CFM training. We show that this generalization of the CFM can effectively reduce the variance in the estimated marginal vector field at a moderate computational cost, thereby improving the quality of the generated samples under common metrics. Additionally, adopting the GP on the streams allows for flexibly linking multiple correlated training data points (e.g., time series). We empirically validate our claim through both simulations and applications to image and neural time series data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.20423v5-abstract-full').style.display = 'none'; document.getElementById('2409.20423v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.14535">arXiv:2409.14535</a> <span> [<a href="https://arxiv.org/pdf/2409.14535">pdf</a>, <a href="https://arxiv.org/format/2409.14535">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Hyper-parameter Optimization for Wireless Network Traffic Prediction Models with A Novel Meta-Learning Framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+L">Liangzhi Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jie Zhang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yuan Gao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiliang Zhang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guiyi Wei</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Haibo Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhuge%2C+B">Bin Zhuge</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zitian Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.14535v1-abstract-short" style="display: inline;"> In this paper, we propose a novel meta-learning based hyper-parameter optimization framework for wireless network traffic prediction models. An attention-based deep neural network (ADNN) is adopted as the prediction model, i.e., base-learner, for each wireless network traffic prediction task, namely base-task, and a meta-learner is employed to automatically generate the optimal hyper-parameters fo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14535v1-abstract-full').style.display = 'inline'; document.getElementById('2409.14535v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.14535v1-abstract-full" style="display: none;"> In this paper, we propose a novel meta-learning based hyper-parameter optimization framework for wireless network traffic prediction models. An attention-based deep neural network (ADNN) is adopted as the prediction model, i.e., base-learner, for each wireless network traffic prediction task, namely base-task, and a meta-learner is employed to automatically generate the optimal hyper-parameters for a given base-learner according to the corresponding base-task's intrinsic characteristics or properties, i.e., meta-features. Based on our observation from real-world traffic records that base-tasks possessing similar meta-features tend to favour similar hyper-parameters for their base-learners, the meta-learner exploits a K-nearest neighbor (KNN) learning method to obtain a set of candidate hyper-parameter selection strategies for a new base-learner, which are then utilized by an advanced genetic algorithm with intelligent chromosome screening to finally acquire the best hyper-parameter selection strategy. Extensive experiments demonstrate that base-learners in the proposed framework have high potential prediction ability for wireless network traffic prediction task, and the meta-learner can enormously elevate the base-learners' performance by providing them the optimal hyper-parameters. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.14535v1-abstract-full').style.display = 'none'; document.getElementById('2409.14535v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to the IEEE for possible publication</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.12246">arXiv:2408.12246</a> <span> [<a href="https://arxiv.org/pdf/2408.12246">pdf</a>, <a href="https://arxiv.org/format/2408.12246">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> OVA-DETR: Open Vocabulary Aerial Object Detection Using Image-Text Alignment and Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guoting Wei</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+X">Xia Yuan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yu Liu</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+Z">Zhenhao Shang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+K">Kelu Yao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chao Li</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Q">Qingsen Yan</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+C">Chunxia Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haokui Zhang</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+R">Rong Xiao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.12246v1-abstract-short" style="display: inline;"> Aerial object detection has been a hot topic for many years due to its wide application requirements. However, most existing approaches can only handle predefined categories, which limits their applicability for the open scenarios in real-world. In this paper, we extend aerial object detection to open scenarios by exploiting the relationship between image and text, and propose OVA-DETR, a high-eff… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12246v1-abstract-full').style.display = 'inline'; document.getElementById('2408.12246v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.12246v1-abstract-full" style="display: none;"> Aerial object detection has been a hot topic for many years due to its wide application requirements. However, most existing approaches can only handle predefined categories, which limits their applicability for the open scenarios in real-world. In this paper, we extend aerial object detection to open scenarios by exploiting the relationship between image and text, and propose OVA-DETR, a high-efficiency open-vocabulary detector for aerial images. Specifically, based on the idea of image-text alignment, we propose region-text contrastive loss to replace the category regression loss in the traditional detection framework, which breaks the category limitation. Then, we propose Bidirectional Vision-Language Fusion (Bi-VLF), which includes a dual-attention fusion encoder and a multi-level text-guided Fusion Decoder. The dual-attention fusion encoder enhances the feature extraction process in the encoder part. The multi-level text-guided Fusion Decoder is designed to improve the detection ability for small objects, which frequently appear in aerial object detection scenarios. Experimental results on three widely used benchmark datasets show that our proposed method significantly improves the mAP and recall, while enjoying faster inference speed. For instance, in zero shot detection experiments on DIOR, the proposed OVA-DETR outperforms DescReg and YOLO-World by 37.4% and 33.1%, respectively, while achieving 87 FPS inference speed, which is 7.9x faster than DescReg and 3x faster than YOLO-world. The code is available at https://github.com/GT-Wei/OVA-DETR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12246v1-abstract-full').style.display = 'none'; document.getElementById('2408.12246v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.01246">arXiv:2408.01246</a> <span> [<a href="https://arxiv.org/pdf/2408.01246">pdf</a>, <a href="https://arxiv.org/format/2408.01246">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> MapComp: A Secure View-based Collaborative Analytics Framework for Join-Group-Aggregation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Peng%2C+X">Xinyu Peng</a>, <a href="/search/cs?searchtype=author&query=Han%2C+F">Feng Han</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+L">Li Peng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Weiran Liu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Z">Zheng Yan</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+K">Kai Kang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guoxing Wei</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jianling Sun</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jinfei Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.01246v3-abstract-short" style="display: inline;"> This paper introduces MapComp, a novel view-based framework to facilitate join-group-aggregation (JGA) queries for collaborative analytics. Through specially crafted materialized view for join and novel design of group-aggregation (GA) protocols, MapComp removes duplicated join workload and expedites subsequent GA, improving the efficiency of JGA query execution. To support continuous data updates… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01246v3-abstract-full').style.display = 'inline'; document.getElementById('2408.01246v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.01246v3-abstract-full" style="display: none;"> This paper introduces MapComp, a novel view-based framework to facilitate join-group-aggregation (JGA) queries for collaborative analytics. Through specially crafted materialized view for join and novel design of group-aggregation (GA) protocols, MapComp removes duplicated join workload and expedites subsequent GA, improving the efficiency of JGA query execution. To support continuous data updates, our materialized view offers payload-independence feature and brings in significant efficiency improvement of view refreshing with free MPC overhead. This feature also allows further acceleration for GA, where we devised multiple novel protocols that outperform prior works. Notably, our work represents the first endeavor to expedite secure collaborative JGA queries using materialized views. Our experiments demonstrate a significant advantage of MapComp, achieving up to a 2189.9x efficiency improvement compared to the non-view based baseline when executing queries eight times. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01246v3-abstract-full').style.display = 'none'; document.getElementById('2408.01246v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00220">arXiv:2408.00220</a> <span> [<a href="https://arxiv.org/pdf/2408.00220">pdf</a>, <a href="https://arxiv.org/format/2408.00220">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Differential Geometry">math.DG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Persistent de Rham-Hodge Laplacians in Eulerian representation for manifold topological learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Su%2C+Z">Zhe Su</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+Y">Yiying Tong</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00220v2-abstract-short" style="display: inline;"> Recently, topological data analysis has become a trending topic in data science and engineering. However, the key technique of topological data analysis, i.e., persistent homology, is defined on point cloud data, which does not work directly for data on manifolds. Although earlier evolutionary de Rham-Hodge theory deals with data on manifolds, it is inconvenient for machine learning applications b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00220v2-abstract-full').style.display = 'inline'; document.getElementById('2408.00220v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00220v2-abstract-full" style="display: none;"> Recently, topological data analysis has become a trending topic in data science and engineering. However, the key technique of topological data analysis, i.e., persistent homology, is defined on point cloud data, which does not work directly for data on manifolds. Although earlier evolutionary de Rham-Hodge theory deals with data on manifolds, it is inconvenient for machine learning applications because of the numerical inconsistency caused by remeshing the involving manifolds in the Lagrangian representation. In this work, we introduce persistent de Rham-Hodge Laplacian, or persistent Hodge Laplacian (PHL) as an abbreviation, for manifold topological learning. Our PHLs are constructed in the Eulerian representation via structure-persevering Cartesian grids, avoiding the numerical inconsistency over the multiscale manifolds. To facilitate the manifold topological learning, we propose a persistent Hodge Laplacian learning algorithm for data on manifolds or volumetric data. As a proof-of-principle application of the proposed manifold topological learning model, we consider the prediction of protein-ligand binding affinities with two benchmark datasets. Our numerical experiments highlight the power and promise of the proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00220v2-abstract-full').style.display = 'none'; document.getElementById('2408.00220v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00118">arXiv:2408.00118</a> <span> [<a href="https://arxiv.org/pdf/2408.00118">pdf</a>, <a href="https://arxiv.org/format/2408.00118">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Gemma 2: Improving Open Language Models at a Practical Size </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gemma+Team"> Gemma Team</a>, <a href="/search/cs?searchtype=author&query=Riviere%2C+M">Morgane Riviere</a>, <a href="/search/cs?searchtype=author&query=Pathak%2C+S">Shreya Pathak</a>, <a href="/search/cs?searchtype=author&query=Sessa%2C+P+G">Pier Giuseppe Sessa</a>, <a href="/search/cs?searchtype=author&query=Hardin%2C+C">Cassidy Hardin</a>, <a href="/search/cs?searchtype=author&query=Bhupatiraju%2C+S">Surya Bhupatiraju</a>, <a href="/search/cs?searchtype=author&query=Hussenot%2C+L">L茅onard Hussenot</a>, <a href="/search/cs?searchtype=author&query=Mesnard%2C+T">Thomas Mesnard</a>, <a href="/search/cs?searchtype=author&query=Shahriari%2C+B">Bobak Shahriari</a>, <a href="/search/cs?searchtype=author&query=Ram%C3%A9%2C+A">Alexandre Ram茅</a>, <a href="/search/cs?searchtype=author&query=Ferret%2C+J">Johan Ferret</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+P">Peter Liu</a>, <a href="/search/cs?searchtype=author&query=Tafti%2C+P">Pouya Tafti</a>, <a href="/search/cs?searchtype=author&query=Friesen%2C+A">Abe Friesen</a>, <a href="/search/cs?searchtype=author&query=Casbon%2C+M">Michelle Casbon</a>, <a href="/search/cs?searchtype=author&query=Ramos%2C+S">Sabela Ramos</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+R">Ravin Kumar</a>, <a href="/search/cs?searchtype=author&query=Lan%2C+C+L">Charline Le Lan</a>, <a href="/search/cs?searchtype=author&query=Jerome%2C+S">Sammy Jerome</a>, <a href="/search/cs?searchtype=author&query=Tsitsulin%2C+A">Anton Tsitsulin</a>, <a href="/search/cs?searchtype=author&query=Vieillard%2C+N">Nino Vieillard</a>, <a href="/search/cs?searchtype=author&query=Stanczyk%2C+P">Piotr Stanczyk</a>, <a href="/search/cs?searchtype=author&query=Girgin%2C+S">Sertan Girgin</a>, <a href="/search/cs?searchtype=author&query=Momchev%2C+N">Nikola Momchev</a>, <a href="/search/cs?searchtype=author&query=Hoffman%2C+M">Matt Hoffman</a> , et al. (173 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00118v3-abstract-short" style="display: inline;"> In this work, we introduce Gemma 2, a new addition to the Gemma family of lightweight, state-of-the-art open models, ranging in scale from 2 billion to 27 billion parameters. In this new version, we apply several known technical modifications to the Transformer architecture, such as interleaving local-global attentions (Beltagy et al., 2020a) and group-query attention (Ainslie et al., 2023). We al… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00118v3-abstract-full').style.display = 'inline'; document.getElementById('2408.00118v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00118v3-abstract-full" style="display: none;"> In this work, we introduce Gemma 2, a new addition to the Gemma family of lightweight, state-of-the-art open models, ranging in scale from 2 billion to 27 billion parameters. In this new version, we apply several known technical modifications to the Transformer architecture, such as interleaving local-global attentions (Beltagy et al., 2020a) and group-query attention (Ainslie et al., 2023). We also train the 2B and 9B models with knowledge distillation (Hinton et al., 2015) instead of next token prediction. The resulting models deliver the best performance for their size, and even offer competitive alternatives to models that are 2-3 times bigger. We release all our models to the community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00118v3-abstract-full').style.display = 'none'; document.getElementById('2408.00118v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.11434">arXiv:2406.11434</a> <span> [<a href="https://arxiv.org/pdf/2406.11434">pdf</a>, <a href="https://arxiv.org/format/2406.11434">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> DB-GPT-Hub: Towards Open Benchmarking Text-to-SQL Empowered by Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+F">Fan Zhou</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+S">Siqiao Xue</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+D">Danrui Qi</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wenhui Shi</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Wang Zhao</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Ganglin Wei</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hongyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Caigai Jiang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+G">Gangwei Jiang</a>, <a href="/search/cs?searchtype=author&query=Chu%2C+Z">Zhixuan Chu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+F">Faqiang Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.11434v1-abstract-short" style="display: inline;"> Large language models (LLMs) becomes the dominant paradigm for the challenging task of text-to-SQL. LLM-empowered text-to-SQL methods are typically categorized into prompting-based and tuning approaches. Compared to prompting-based methods, benchmarking fine-tuned LLMs for text-to-SQL is important yet under-explored, partially attributed to the prohibitively high computational cost. In this paper,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11434v1-abstract-full').style.display = 'inline'; document.getElementById('2406.11434v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.11434v1-abstract-full" style="display: none;"> Large language models (LLMs) becomes the dominant paradigm for the challenging task of text-to-SQL. LLM-empowered text-to-SQL methods are typically categorized into prompting-based and tuning approaches. Compared to prompting-based methods, benchmarking fine-tuned LLMs for text-to-SQL is important yet under-explored, partially attributed to the prohibitively high computational cost. In this paper, we present DB-GPT-Hub, an open benchmark suite for LLM-empowered text-to-SQL, which primarily focuses on tuning LLMs at large scales. The proposed benchmark consists of: 1. a standardized and comprehensive evaluation of text-to-SQL tasks by fine-tuning medium to large-sized open LLMs; 2. a modularized and easy-to-extend codebase with mainstream LLMs and experimental scenarios supported, which prioritizes fine-tuning methods but can be easily extended to prompt-based setting. Our work investigates the potential gains and the performance boundaries of tuning approaches, compared to prompting approaches and explores optimal solutions tailored to specific scenarios. We hope DB-GPT-Hub, along with these findings, enables further research and broad applications that would otherwise be difficult owing to the absence of a dedicated open benchmark. The project code has been released at https://github.com/eosphoros-ai/DB-GPT-Hub. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11434v1-abstract-full').style.display = 'none'; document.getElementById('2406.11434v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.10126">arXiv:2406.10126</a> <span> [<a href="https://arxiv.org/pdf/2406.10126">pdf</a>, <a href="https://arxiv.org/format/2406.10126">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Training-free Camera Control for Video Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hou%2C+C">Chen Hou</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guoqiang Wei</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Y">Yan Zeng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhibo Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.10126v3-abstract-short" style="display: inline;"> We propose a training-free and robust solution to offer camera movement control for off-the-shelf video diffusion models. Unlike previous work, our method does not require any supervised finetuning on camera-annotated datasets or self-supervised training via data augmentation. Instead, it can be plugged and played with most pretrained video diffusion models and generate camera controllable videos… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10126v3-abstract-full').style.display = 'inline'; document.getElementById('2406.10126v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.10126v3-abstract-full" style="display: none;"> We propose a training-free and robust solution to offer camera movement control for off-the-shelf video diffusion models. Unlike previous work, our method does not require any supervised finetuning on camera-annotated datasets or self-supervised training via data augmentation. Instead, it can be plugged and played with most pretrained video diffusion models and generate camera controllable videos with a single image or text prompt as input. The inspiration of our work comes from the layout prior that intermediate latents hold towards generated results, thus rearranging noisy pixels in them will make output content reallocated as well. As camera move could also be seen as a kind of pixel rearrangement caused by perspective change, videos could be reorganized following specific camera motion if their noisy latents change accordingly. Established on this, we propose our method CamTrol, which enables robust camera control for video diffusion models. It is achieved by a two-stage process. First, we model image layout rearrangement through explicit camera movement in 3D point cloud space. Second, we generate videos with camera motion using layout prior of noisy latents formed by a series of rearranged images. Extensive experiments have demonstrated the robustness our method holds in controlling camera motion of generated videos. Furthermore, we show that our method can produce impressive results in generating 3D rotation videos with dynamic content. Project page at https://lifedecoder.github.io/CamTrol/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.10126v3-abstract-full').style.display = 'none'; document.getElementById('2406.10126v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.06479">arXiv:2406.06479</a> <span> [<a href="https://arxiv.org/pdf/2406.06479">pdf</a>, <a href="https://arxiv.org/format/2406.06479">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Graph-Based Bidirectional Transformer Decision Threshold Adjustment Algorithm for Class-Imbalanced Molecular Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hayes%2C+N">Nicole Hayes</a>, <a href="/search/cs?searchtype=author&query=Merkurjev%2C+E">Ekaterina Merkurjev</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.06479v3-abstract-short" style="display: inline;"> Data sets with imbalanced class sizes, where one class size is much smaller than that of others, occur exceedingly often in many applications, including those with biological foundations, such as disease diagnosis and drug discovery. Therefore, it is extremely important to be able to identify data elements of classes of various sizes, as a failure to do so can result in heavy costs. Nonetheless, m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.06479v3-abstract-full').style.display = 'inline'; document.getElementById('2406.06479v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.06479v3-abstract-full" style="display: none;"> Data sets with imbalanced class sizes, where one class size is much smaller than that of others, occur exceedingly often in many applications, including those with biological foundations, such as disease diagnosis and drug discovery. Therefore, it is extremely important to be able to identify data elements of classes of various sizes, as a failure to do so can result in heavy costs. Nonetheless, many data classification procedures do not perform well on imbalanced data sets as they often fail to detect elements belonging to underrepresented classes. In this work, we propose the BTDT-MBO algorithm, incorporating Merriman-Bence-Osher (MBO) approaches and a bidirectional transformer, as well as distance correlation and decision threshold adjustments, for data classification tasks on highly imbalanced molecular data sets, where the sizes of the classes vary greatly. The proposed technique not only integrates adjustments in the classification threshold for the MBO algorithm in order to help deal with the class imbalance, but also uses a bidirectional transformer procedure based on an attention mechanism for self-supervised learning. In addition, the model implements distance correlation as a weight function for the similarity graph-based framework on which the adjusted MBO algorithm operates. The proposed method is validated using six molecular data sets and compared to other related techniques. The computational experiments show that the proposed technique is superior to competing approaches even in the case of a high class imbalance ratio. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.06479v3-abstract-full').style.display = 'none'; document.getElementById('2406.06479v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.00492">arXiv:2406.00492</a> <span> [<a href="https://arxiv.org/pdf/2406.00492">pdf</a>, <a href="https://arxiv.org/format/2406.00492">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SAM-VMNet: Deep Neural Networks For Coronary Angiography Vessel Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+X">Xueying Zeng</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+B">Baixiang Huang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Y">Yu Luo</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guangyu Wei</a>, <a href="/search/cs?searchtype=author&query=He%2C+S">Songyan He</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+Y">Yushuang Shao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.00492v1-abstract-short" style="display: inline;"> Coronary artery disease (CAD) is one of the most prevalent diseases in the cardiovascular field and one of the major contributors to death worldwide. Computed Tomography Angiography (CTA) images are regarded as the authoritative standard for the diagnosis of coronary artery disease, and by performing vessel segmentation and stenosis detection on CTA images, physicians are able to diagnose coronary… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00492v1-abstract-full').style.display = 'inline'; document.getElementById('2406.00492v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.00492v1-abstract-full" style="display: none;"> Coronary artery disease (CAD) is one of the most prevalent diseases in the cardiovascular field and one of the major contributors to death worldwide. Computed Tomography Angiography (CTA) images are regarded as the authoritative standard for the diagnosis of coronary artery disease, and by performing vessel segmentation and stenosis detection on CTA images, physicians are able to diagnose coronary artery disease more accurately. In order to combine the advantages of both the base model and the domain-specific model, and to achieve high-precision and fully-automatic segmentation and detection with a limited number of training samples, we propose a novel architecture, SAM-VMNet, which combines the powerful feature extraction capability of MedSAM with the advantage of the linear complexity of the visual state-space model of VM-UNet, giving it faster inferences than Vision Transformer with faster inference speed and stronger data processing capability, achieving higher segmentation accuracy and stability for CTA images. Experimental results show that the SAM-VMNet architecture performs excellently in the CTA image segmentation task, with a segmentation accuracy of up to 98.32% and a sensitivity of up to 99.33%, which is significantly better than other existing models and has stronger domain adaptability. Comprehensive evaluation of the CTA image segmentation task shows that SAM-VMNet accurately extracts the vascular trunks and capillaries, demonstrating its great potential and wide range of application scenarios for the vascular segmentation task, and also laying a solid foundation for further stenosis detection. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.00492v1-abstract-full').style.display = 'none'; document.getElementById('2406.00492v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.14858">arXiv:2405.14858</a> <span> [<a href="https://arxiv.org/pdf/2405.14858">pdf</a>, <a href="https://arxiv.org/format/2405.14858">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Mamba-R: Vision Mamba ALSO Needs Registers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+F">Feng Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiahao Wang</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+S">Sucheng Ren</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guoyizhe Wei</a>, <a href="/search/cs?searchtype=author&query=Mei%2C+J">Jieru Mei</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+W">Wei Shao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yuyin Zhou</a>, <a href="/search/cs?searchtype=author&query=Yuille%2C+A">Alan Yuille</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+C">Cihang Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.14858v1-abstract-short" style="display: inline;"> Similar to Vision Transformers, this paper identifies artifacts also present within the feature maps of Vision Mamba. These artifacts, corresponding to high-norm tokens emerging in low-information background areas of images, appear much more severe in Vision Mamba -- they exist prevalently even with the tiny-sized model and activate extensively across background regions. To mitigate this issue, we… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14858v1-abstract-full').style.display = 'inline'; document.getElementById('2405.14858v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.14858v1-abstract-full" style="display: none;"> Similar to Vision Transformers, this paper identifies artifacts also present within the feature maps of Vision Mamba. These artifacts, corresponding to high-norm tokens emerging in low-information background areas of images, appear much more severe in Vision Mamba -- they exist prevalently even with the tiny-sized model and activate extensively across background regions. To mitigate this issue, we follow the prior solution of introducing register tokens into Vision Mamba. To better cope with Mamba blocks' uni-directional inference paradigm, two key modifications are introduced: 1) evenly inserting registers throughout the input token sequence, and 2) recycling registers for final decision predictions. We term this new architecture Mamba-R. Qualitative observations suggest, compared to vanilla Vision Mamba, Mamba-R's feature maps appear cleaner and more focused on semantically meaningful regions. Quantitatively, Mamba-R attains stronger performance and scales better. For example, on the ImageNet benchmark, our base-size Mamba-R attains 82.9% accuracy, significantly outperforming Vim-B's 81.8%; furthermore, we provide the first successful scaling to the large model size (i.e., with 341M parameters), attaining a competitive accuracy of 83.2% (84.5% if finetuned with 384x384 inputs). Additional validation on the downstream semantic segmentation task also supports Mamba-R's efficacy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.14858v1-abstract-full').style.display = 'none'; document.getElementById('2405.14858v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.13858">arXiv:2405.13858</a> <span> [<a href="https://arxiv.org/pdf/2405.13858">pdf</a>, <a href="https://arxiv.org/format/2405.13858">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Carbon Connect: An Ecosystem for Sustainable Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+B+C">Benjamin C. Lee</a>, <a href="/search/cs?searchtype=author&query=Brooks%2C+D">David Brooks</a>, <a href="/search/cs?searchtype=author&query=van+Benthem%2C+A">Arthur van Benthem</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+U">Udit Gupta</a>, <a href="/search/cs?searchtype=author&query=Hills%2C+G">Gage Hills</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+V">Vincent Liu</a>, <a href="/search/cs?searchtype=author&query=Pierce%2C+B">Benjamin Pierce</a>, <a href="/search/cs?searchtype=author&query=Stewart%2C+C">Christopher Stewart</a>, <a href="/search/cs?searchtype=author&query=Strubell%2C+E">Emma Strubell</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Gu-Yeon Wei</a>, <a href="/search/cs?searchtype=author&query=Wierman%2C+A">Adam Wierman</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+Y">Yuan Yao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+M">Minlan Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.13858v2-abstract-short" style="display: inline;"> Computing is at a moment of profound opportunity. Emerging applications -- such as capable artificial intelligence, immersive virtual realities, and pervasive sensor systems -- drive unprecedented demand for computer. Despite recent advances toward net zero carbon emissions, the computing industry's gross energy usage continues to rise at an alarming rate, outpacing the growth of new energy instal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13858v2-abstract-full').style.display = 'inline'; document.getElementById('2405.13858v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.13858v2-abstract-full" style="display: none;"> Computing is at a moment of profound opportunity. Emerging applications -- such as capable artificial intelligence, immersive virtual realities, and pervasive sensor systems -- drive unprecedented demand for computer. Despite recent advances toward net zero carbon emissions, the computing industry's gross energy usage continues to rise at an alarming rate, outpacing the growth of new energy installations and renewable energy deployments. A shift towards sustainability is needed to spark a transformation in how computer systems are manufactured, allocated, and consumed. Carbon Connect envisions coordinated research thrusts that produce design and management strategies for sustainable, next-generation computer systems. These strategies must flatten and then reverse growth trajectories for computing power and carbon for society's most rapidly growing applications such as artificial intelligence and virtual spaces. We will require accurate models for carbon accounting in computing technology. For embodied carbon, we must re-think conventional design strategies -- over-provisioned monolithic servers, frequent hardware refresh cycles, custom silicon -- and adopt life-cycle design strategies that more effectively reduce, reuse and recycle hardware at scale. For operational carbon, we must not only embrace renewable energy but also design systems to use that energy more efficiently. Finally, new hardware design and management strategies must be cognizant of economic policy and regulatory landscape, aligning private initiatives with societal goals. Many of these broader goals will require computer scientists to develop deep, enduring collaborations with researchers in economics, law, and industrial ecology to spark change in broader practice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13858v2-abstract-full').style.display = 'none'; document.getElementById('2405.13858v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.11461">arXiv:2405.11461</a> <span> [<a href="https://arxiv.org/pdf/2405.11461">pdf</a>, <a href="https://arxiv.org/format/2405.11461">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> DocReLM: Mastering Document Retrieval with Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+G">Gengchen Wei</a>, <a href="/search/cs?searchtype=author&query=Pang%2C+X">Xinle Pang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tianning Zhang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yu Sun</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+X">Xun Qian</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+C">Chen Lin</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+H">Han-Sen Zhong</a>, <a href="/search/cs?searchtype=author&query=Ouyang%2C+W">Wanli Ouyang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.11461v1-abstract-short" style="display: inline;"> With over 200 million published academic documents and millions of new documents being written each year, academic researchers face the challenge of searching for information within this vast corpus. However, existing retrieval systems struggle to understand the semantics and domain knowledge present in academic papers. In this work, we demonstrate that by utilizing large language models, a docume… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11461v1-abstract-full').style.display = 'inline'; document.getElementById('2405.11461v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.11461v1-abstract-full" style="display: none;"> With over 200 million published academic documents and millions of new documents being written each year, academic researchers face the challenge of searching for information within this vast corpus. However, existing retrieval systems struggle to understand the semantics and domain knowledge present in academic papers. In this work, we demonstrate that by utilizing large language models, a document retrieval system can achieve advanced semantic understanding capabilities, significantly outperforming existing systems. Our approach involves training the retriever and reranker using domain-specific data generated by large language models. Additionally, we utilize large language models to identify candidates from the references of retrieved papers to further enhance the performance. We use a test set annotated by academic researchers in the fields of quantum physics and computer vision to evaluate our system's performance. The results show that DocReLM achieves a Top 10 accuracy of 44.12% in computer vision, compared to Google Scholar's 15.69%, and an increase to 36.21% in quantum physics, while that of Google Scholar is 12.96%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.11461v1-abstract-full').style.display = 'none'; document.getElementById('2405.11461v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.03987">arXiv:2405.03987</a> <span> [<a href="https://arxiv.org/pdf/2405.03987">pdf</a>, <a href="https://arxiv.org/format/2405.03987">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> </div> </div> <p class="title is-5 mathjax"> Navigating Chemical Space with Latent Flows </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guanghao Wei</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yining Huang</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+C">Chenru Duan</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yue Song</a>, <a href="/search/cs?searchtype=author&query=Du%2C+Y">Yuanqi Du</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.03987v3-abstract-short" style="display: inline;"> Recent progress of deep generative models in the vision and language domain has stimulated significant interest in more structured data generation such as molecules. However, beyond generating new random molecules, efficient exploration and a comprehensive understanding of the vast chemical space are of great importance to molecular science and applications in drug design and materials discovery.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.03987v3-abstract-full').style.display = 'inline'; document.getElementById('2405.03987v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.03987v3-abstract-full" style="display: none;"> Recent progress of deep generative models in the vision and language domain has stimulated significant interest in more structured data generation such as molecules. However, beyond generating new random molecules, efficient exploration and a comprehensive understanding of the vast chemical space are of great importance to molecular science and applications in drug design and materials discovery. In this paper, we propose a new framework, ChemFlow, to traverse chemical space through navigating the latent space learned by molecule generative models through flows. We introduce a dynamical system perspective that formulates the problem as learning a vector field that transports the mass of the molecular distribution to the region with desired molecular properties or structure diversity. Under this framework, we unify previous approaches on molecule latent space traversal and optimization and propose alternative competing methods incorporating different physical priors. We validate the efficacy of ChemFlow on molecule manipulation and single- and multi-objective molecule optimization tasks under both supervised and unsupervised molecular discovery settings. Codes and demos are publicly available on GitHub at https://github.com/garywei944/ChemFlow. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.03987v3-abstract-full').style.display = 'none'; document.getElementById('2405.03987v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for presentation at NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.02803">arXiv:2405.02803</a> <span> [<a href="https://arxiv.org/pdf/2405.02803">pdf</a>, <a href="https://arxiv.org/format/2405.02803">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Is Flash Attention Stable? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Golden%2C+A">Alicia Golden</a>, <a href="/search/cs?searchtype=author&query=Hsia%2C+S">Samuel Hsia</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+F">Fei Sun</a>, <a href="/search/cs?searchtype=author&query=Acun%2C+B">Bilge Acun</a>, <a href="/search/cs?searchtype=author&query=Hosmer%2C+B">Basil Hosmer</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+Y">Yejin Lee</a>, <a href="/search/cs?searchtype=author&query=DeVito%2C+Z">Zachary DeVito</a>, <a href="/search/cs?searchtype=author&query=Johnson%2C+J">Jeff Johnson</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Gu-Yeon Wei</a>, <a href="/search/cs?searchtype=author&query=Brooks%2C+D">David Brooks</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+C">Carole-Jean Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.02803v1-abstract-short" style="display: inline;"> Training large-scale machine learning models poses distinct system challenges, given both the size and complexity of today's workloads. Recently, many organizations training state-of-the-art Generative AI models have reported cases of instability during training, often taking the form of loss spikes. Numeric deviation has emerged as a potential cause of this training instability, although quantify… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.02803v1-abstract-full').style.display = 'inline'; document.getElementById('2405.02803v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.02803v1-abstract-full" style="display: none;"> Training large-scale machine learning models poses distinct system challenges, given both the size and complexity of today's workloads. Recently, many organizations training state-of-the-art Generative AI models have reported cases of instability during training, often taking the form of loss spikes. Numeric deviation has emerged as a potential cause of this training instability, although quantifying this is especially challenging given the costly nature of training runs. In this work, we develop a principled approach to understanding the effects of numeric deviation, and construct proxies to put observations into context when downstream effects are difficult to quantify. As a case study, we apply this framework to analyze the widely-adopted Flash Attention optimization. We find that Flash Attention sees roughly an order of magnitude more numeric deviation as compared to Baseline Attention at BF16 when measured during an isolated forward pass. We then use a data-driven analysis based on the Wasserstein Distance to provide upper bounds on how this numeric deviation impacts model weights during training, finding that the numerical deviation present in Flash Attention is 2-5 times less significant than low-precision training. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.02803v1-abstract-full').style.display = 'none'; document.getElementById('2405.02803v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.19534">arXiv:2404.19534</a> <span> [<a href="https://arxiv.org/pdf/2404.19534">pdf</a>, <a href="https://arxiv.org/format/2404.19534">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MIPI 2024 Challenge on Nighttime Flare Removal: Methods and Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dai%2C+Y">Yuekun Dai</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Dafeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaoming Li</a>, <a href="/search/cs?searchtype=author&query=Yue%2C+Z">Zongsheng Yue</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chongyi Li</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+S">Shangchen Zhou</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+R">Ruicheng Feng</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+P">Peiqing Yang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Z">Zhezhu Jin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guanqun Liu</a>, <a href="/search/cs?searchtype=author&query=Loy%2C+C+C">Chen Change Loy</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lize Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shuai Liu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+C">Chaoyu Feng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Luyang Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shuan Chen</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+G">Guangqi Shao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaotao Wang</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+L">Lei Lei</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Q">Qirui Yang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Q">Qihua Cheng</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhiqiang Xu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yihao Liu</a>, <a href="/search/cs?searchtype=author&query=Yue%2C+H">Huanjing Yue</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jingyu Yang</a> , et al. (38 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.19534v2-abstract-short" style="display: inline;"> The increasing demand for computational photography and imaging on mobile platforms has led to the widespread development and integration of advanced image sensors with novel algorithms in camera systems. However, the scarcity of high-quality data for research and the rare opportunity for in-depth exchange of views from industry and academia constrain the development of mobile intelligent photogra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.19534v2-abstract-full').style.display = 'inline'; document.getElementById('2404.19534v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.19534v2-abstract-full" style="display: none;"> The increasing demand for computational photography and imaging on mobile platforms has led to the widespread development and integration of advanced image sensors with novel algorithms in camera systems. However, the scarcity of high-quality data for research and the rare opportunity for in-depth exchange of views from industry and academia constrain the development of mobile intelligent photography and imaging (MIPI). Building on the achievements of the previous MIPI Workshops held at ECCV 2022 and CVPR 2023, we introduce our third MIPI challenge including three tracks focusing on novel image sensors and imaging algorithms. In this paper, we summarize and review the Nighttime Flare Removal track on MIPI 2024. In total, 170 participants were successfully registered, and 14 teams submitted results in the final testing phase. The developed solutions in this challenge achieved state-of-the-art performance on Nighttime Flare Removal. More details of this challenge and the link to the dataset can be found at https://mipi-challenge.org/MIPI2024/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.19534v2-abstract-full').style.display = 'none'; document.getElementById('2404.19534v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR 2024 Mobile Intelligent Photography and Imaging (MIPI) Workshop--Nighttime Flare Removal Challenge Report. Website: https://mipi-challenge.org/MIPI2024/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.10209">arXiv:2404.10209</a> <span> [<a href="https://arxiv.org/pdf/2404.10209">pdf</a>, <a href="https://arxiv.org/format/2404.10209">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Demonstration of DB-GPT: Next Generation Data Interaction System Empowered by Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xue%2C+S">Siqiao Xue</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+D">Danrui Qi</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Caigao Jiang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wenhui Shi</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+F">Fangyin Cheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keting Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hongjun Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhiping Zhang</a>, <a href="/search/cs?searchtype=author&query=He%2C+J">Jianshan He</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hongyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Ganglin Wei</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Wang Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+F">Fan Zhou</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+H">Hong Yi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shaodong Liu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hongjun Yang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+F">Faqiang Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.10209v3-abstract-short" style="display: inline;"> The recent breakthroughs in large language models (LLMs) are positioned to transition many areas of software. The technologies of interacting with data particularly have an important entanglement with LLMs as efficient and intuitive data interactions are paramount. In this paper, we present DB-GPT, a revolutionary and product-ready Python library that integrates LLMs into traditional data interact… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.10209v3-abstract-full').style.display = 'inline'; document.getElementById('2404.10209v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.10209v3-abstract-full" style="display: none;"> The recent breakthroughs in large language models (LLMs) are positioned to transition many areas of software. The technologies of interacting with data particularly have an important entanglement with LLMs as efficient and intuitive data interactions are paramount. In this paper, we present DB-GPT, a revolutionary and product-ready Python library that integrates LLMs into traditional data interaction tasks to enhance user experience and accessibility. DB-GPT is designed to understand data interaction tasks described by natural language and provide context-aware responses powered by LLMs, making it an indispensable tool for users ranging from novice to expert. Its system design supports deployment across local, distributed, and cloud environments. Beyond handling basic data interaction tasks like Text-to-SQL with LLMs, it can handle complex tasks like generative data analysis through a Multi-Agents framework and the Agentic Workflow Expression Language (AWEL). The Service-oriented Multi-model Management Framework (SMMF) ensures data privacy and security, enabling users to employ DB-GPT with private LLMs. Additionally, DB-GPT offers a series of product-ready features designed to enable users to integrate DB-GPT within their product environments easily. The code of DB-GPT is available at Github(https://github.com/eosphoros-ai/DB-GPT) which already has over 10.7k stars. Please install DB-GPT for your own usage with the instructions(https://github.com/eosphoros-ai/DB-GPT#install) and watch a 5-minute introduction video on Youtube(https://youtu.be/n_8RI1ENyl4) to further investigate DB-GPT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.10209v3-abstract-full').style.display = 'none'; document.getElementById('2404.10209v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.08217">arXiv:2404.08217</a> <span> [<a href="https://arxiv.org/pdf/2404.08217">pdf</a>, <a href="https://arxiv.org/format/2404.08217">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> Escape with Your Self: A Solution to the Avoidance Problem with Decidable Bidirectional Typing for Reachability Types </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jia%2C+S">Songlin Jia</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guannan Wei</a>, <a href="/search/cs?searchtype=author&query=He%2C+S">Siyuan He</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+Y">Yuyan Bao</a>, <a href="/search/cs?searchtype=author&query=Rompf%2C+T">Tiark Rompf</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.08217v4-abstract-short" style="display: inline;"> Despite Rust's success in system programming, its ``shared XOR mutable'' principle significantly restricts how mutable values can be used, precluding many useful functional programming idioms. Reachability types are a recent proposal to address the key limitations of Rust-style approaches by tracking, rather than prohibiting, shared, escaping, and mutable data, even in the presence of higher-order… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.08217v4-abstract-full').style.display = 'inline'; document.getElementById('2404.08217v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.08217v4-abstract-full" style="display: none;"> Despite Rust's success in system programming, its ``shared XOR mutable'' principle significantly restricts how mutable values can be used, precluding many useful functional programming idioms. Reachability types are a recent proposal to address the key limitations of Rust-style approaches by tracking, rather than prohibiting, shared, escaping, and mutable data, even in the presence of higher-order functions and polymorphic types. The key to enabling tracking in the presence of avoidance is their notion of self-references. Similar to this pointers in OO languages, self-references expose the reachability of enclosing objects to internal components. While they help track escaped data, they present major challenges in designing expressive subtyping and decidable typing algorithms, as they involve subtle interactions with bounds and variance. This lack of an effective type checking algorithm is a key impediment toward making reachability types truly practical and leveraging them to bring the benefits of programming with lifetimes to practical higher-level languages. In this paper, we investigate the issues of subtyping and type checking of self-references, to fully enable this avoidance solution. We address key gaps in previous work by proposing a refined notion of subtyping, which supports encoding datatypes without resorting to term-level coercions, making the overall system more expressive. We also develop a sound and decidable bidirectional typing algorithm, formally verified in Coq. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.08217v4-abstract-full').style.display = 'none'; document.getElementById('2404.08217v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.13513">arXiv:2402.13513</a> <span> [<a href="https://arxiv.org/pdf/2402.13513">pdf</a>, <a href="https://arxiv.org/format/2402.13513">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> Guac: Energy-Aware and SSA-Based Generation of Coarse-Grained Merged Accelerators from LLVM-IR </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Brumar%2C+I">Iulian Brumar</a>, <a href="/search/cs?searchtype=author&query=Rocha%2C+R">Rodrigo Rocha</a>, <a href="/search/cs?searchtype=author&query=Bernat%2C+A">Alex Bernat</a>, <a href="/search/cs?searchtype=author&query=Tripathy%2C+D">Devashree Tripathy</a>, <a href="/search/cs?searchtype=author&query=Brooks%2C+D">David Brooks</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Gu-Yeon Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.13513v1-abstract-short" style="display: inline;"> Designing accelerators for resource- and power-constrained applications is a daunting task. High-level Synthesis (HLS) addresses these constraints through resource sharing, an optimization at the HLS binding stage that maps multiple operations to the same functional unit. However, resource sharing is often limited to reusing instructions within a basic block. Instead of searching globally for th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13513v1-abstract-full').style.display = 'inline'; document.getElementById('2402.13513v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.13513v1-abstract-full" style="display: none;"> Designing accelerators for resource- and power-constrained applications is a daunting task. High-level Synthesis (HLS) addresses these constraints through resource sharing, an optimization at the HLS binding stage that maps multiple operations to the same functional unit. However, resource sharing is often limited to reusing instructions within a basic block. Instead of searching globally for the best control and dataflow graphs (CDFGs) to combine, it is constrained by existing instruction mappings and schedules. Coarse-grained function merging (CGFM) at the intermediate representation (IR) level can reuse control and dataflow patterns without dealing with the post-scheduling complexity of mapping operations onto functional units, wires, and registers. The merged functions produced by CGFM can be translated to RTL by HLS, yielding Coarse Grained Merged Accelerators (CGMAs). CGMAs are especially profitable across applications with similar data- and control-flow patterns. Prior work has used CGFM to generate CGMAs without regard for which CGFM algorithms best optimize area, power, and energy costs. We propose Guac, an energy-aware and SSA-based (static single assignment) CGMA generation methodology. Guac implements a novel ensemble of cost models for efficient CGMA generation. We also show that CGFM algorithms using SSA form to merge control- and dataflow graphs outperform prior non-SSA CGFM designs. We demonstrate significant area, power, and energy savings with respect to the state of the art. In particular, Guac more than doubles energy savings with respect to the closest related work while using a strong resource-sharing baseline. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.13513v1-abstract-full').style.display = 'none'; document.getElementById('2402.13513v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.10186">arXiv:2402.10186</a> <span> [<a href="https://arxiv.org/pdf/2402.10186">pdf</a>, <a href="https://arxiv.org/format/2402.10186">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> </div> </div> <p class="title is-5 mathjax"> Self-consistent Validation for Machine Learning Electronic Structure </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+G">Gengyuan Hu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Gengchen Wei</a>, <a href="/search/cs?searchtype=author&query=Lou%2C+Z">Zekun Lou</a>, <a href="/search/cs?searchtype=author&query=Torr%2C+P+H+S">Philip H. S. Torr</a>, <a href="/search/cs?searchtype=author&query=Ouyang%2C+W">Wanli Ouyang</a>, <a href="/search/cs?searchtype=author&query=Zhong%2C+H">Han-sen Zhong</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+C">Chen Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.10186v1-abstract-short" style="display: inline;"> Machine learning has emerged as a significant approach to efficiently tackle electronic structure problems. Despite its potential, there is less guarantee for the model to generalize to unseen data that hinders its application in real-world scenarios. To address this issue, a technique has been proposed to estimate the accuracy of the predictions. This method integrates machine learning with self-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.10186v1-abstract-full').style.display = 'inline'; document.getElementById('2402.10186v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.10186v1-abstract-full" style="display: none;"> Machine learning has emerged as a significant approach to efficiently tackle electronic structure problems. Despite its potential, there is less guarantee for the model to generalize to unseen data that hinders its application in real-world scenarios. To address this issue, a technique has been proposed to estimate the accuracy of the predictions. This method integrates machine learning with self-consistent field methods to achieve both low validation cost and interpret-ability. This, in turn, enables exploration of the model's ability with active learning and instills confidence in its integration into real-world studies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.10186v1-abstract-full').style.display = 'none'; document.getElementById('2402.10186v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.08871">arXiv:2402.08871</a> <span> [<a href="https://arxiv.org/pdf/2402.08871">pdf</a>, <a href="https://arxiv.org/format/2402.08871">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Position: Topological Deep Learning is the New Frontier for Relational Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Papamarkou%2C+T">Theodore Papamarkou</a>, <a href="/search/cs?searchtype=author&query=Birdal%2C+T">Tolga Birdal</a>, <a href="/search/cs?searchtype=author&query=Bronstein%2C+M">Michael Bronstein</a>, <a href="/search/cs?searchtype=author&query=Carlsson%2C+G">Gunnar Carlsson</a>, <a href="/search/cs?searchtype=author&query=Curry%2C+J">Justin Curry</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yue Gao</a>, <a href="/search/cs?searchtype=author&query=Hajij%2C+M">Mustafa Hajij</a>, <a href="/search/cs?searchtype=author&query=Kwitt%2C+R">Roland Kwitt</a>, <a href="/search/cs?searchtype=author&query=Li%C3%B2%2C+P">Pietro Li貌</a>, <a href="/search/cs?searchtype=author&query=Di+Lorenzo%2C+P">Paolo Di Lorenzo</a>, <a href="/search/cs?searchtype=author&query=Maroulas%2C+V">Vasileios Maroulas</a>, <a href="/search/cs?searchtype=author&query=Miolane%2C+N">Nina Miolane</a>, <a href="/search/cs?searchtype=author&query=Nasrin%2C+F">Farzana Nasrin</a>, <a href="/search/cs?searchtype=author&query=Ramamurthy%2C+K+N">Karthikeyan Natesan Ramamurthy</a>, <a href="/search/cs?searchtype=author&query=Rieck%2C+B">Bastian Rieck</a>, <a href="/search/cs?searchtype=author&query=Scardapane%2C+S">Simone Scardapane</a>, <a href="/search/cs?searchtype=author&query=Schaub%2C+M+T">Michael T. Schaub</a>, <a href="/search/cs?searchtype=author&query=Veli%C4%8Dkovi%C4%87%2C+P">Petar Veli膷kovi膰</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bei Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yusu Wang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a>, <a href="/search/cs?searchtype=author&query=Zamzmi%2C+G">Ghada Zamzmi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.08871v3-abstract-short" style="display: inline;"> Topological deep learning (TDL) is a rapidly evolving field that uses topological features to understand and design deep learning models. This paper posits that TDL is the new frontier for relational learning. TDL may complement graph representation learning and geometric deep learning by incorporating topological concepts, and can thus provide a natural choice for various machine learning setting… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.08871v3-abstract-full').style.display = 'inline'; document.getElementById('2402.08871v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.08871v3-abstract-full" style="display: none;"> Topological deep learning (TDL) is a rapidly evolving field that uses topological features to understand and design deep learning models. This paper posits that TDL is the new frontier for relational learning. TDL may complement graph representation learning and geometric deep learning by incorporating topological concepts, and can thus provide a natural choice for various machine learning settings. To this end, this paper discusses open problems in TDL, ranging from practical benefits to theoretical foundations. For each problem, it outlines potential solutions and future research opportunities. At the same time, this paper serves as an invitation to the scientific community to actively participate in TDL research to unlock the potential of this emerging field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.08871v3-abstract-full').style.display = 'none'; document.getElementById('2402.08871v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Proceedings of the 41st International Conference on Machine Learning, Vienna, Austria. PMLR 235, 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.01566">arXiv:2402.01566</a> <span> [<a href="https://arxiv.org/pdf/2402.01566">pdf</a>, <a href="https://arxiv.org/format/2402.01566">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Boximator: Generating Rich and Controllable Motions for Video Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiawei Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuchen Zhang</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+J">Jiaxin Zou</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Y">Yan Zeng</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guoqiang Wei</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+L">Liping Yuan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.01566v1-abstract-short" style="display: inline;"> Generating rich and controllable motion is a pivotal challenge in video synthesis. We propose Boximator, a new approach for fine-grained motion control. Boximator introduces two constraint types: hard box and soft box. Users select objects in the conditional frame using hard boxes and then use either type of boxes to roughly or rigorously define the object's position, shape, or motion path in futu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01566v1-abstract-full').style.display = 'inline'; document.getElementById('2402.01566v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.01566v1-abstract-full" style="display: none;"> Generating rich and controllable motion is a pivotal challenge in video synthesis. We propose Boximator, a new approach for fine-grained motion control. Boximator introduces two constraint types: hard box and soft box. Users select objects in the conditional frame using hard boxes and then use either type of boxes to roughly or rigorously define the object's position, shape, or motion path in future frames. Boximator functions as a plug-in for existing video diffusion models. Its training process preserves the base model's knowledge by freezing the original weights and training only the control module. To address training challenges, we introduce a novel self-tracking technique that greatly simplifies the learning of box-object correlations. Empirically, Boximator achieves state-of-the-art video quality (FVD) scores, improving on two base models, and further enhanced after incorporating box constraints. Its robust motion controllability is validated by drastic increases in the bounding box alignment metric. Human evaluation also shows that users favor Boximator generation results over the base model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.01566v1-abstract-full').style.display = 'none'; document.getElementById('2402.01566v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.16732">arXiv:2401.16732</a> <span> [<a href="https://arxiv.org/pdf/2401.16732">pdf</a>, <a href="https://arxiv.org/format/2401.16732">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Flash: A Hybrid Private Inference Protocol for Deep CNNs with High Accuracy and Low Latency on CPU </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Roh%2C+H">Hyeri Roh</a>, <a href="/search/cs?searchtype=author&query=Yeo%2C+J">Jinsu Yeo</a>, <a href="/search/cs?searchtype=author&query=Ko%2C+Y">Yeongil Ko</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Gu-Yeon Wei</a>, <a href="/search/cs?searchtype=author&query=Brooks%2C+D">David Brooks</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+W">Woo-Seok Choi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.16732v2-abstract-short" style="display: inline;"> This paper presents Flash, an optimized private inference (PI) hybrid protocol utilizing both homomorphic encryption (HE) and secure two-party computation (2PC), which can reduce the end-to-end PI latency for deep CNN models less than 1 minute with CPU. To this end, first, Flash proposes a low-latency convolution algorithm built upon a fast slot rotation operation and a novel data encoding scheme,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.16732v2-abstract-full').style.display = 'inline'; document.getElementById('2401.16732v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.16732v2-abstract-full" style="display: none;"> This paper presents Flash, an optimized private inference (PI) hybrid protocol utilizing both homomorphic encryption (HE) and secure two-party computation (2PC), which can reduce the end-to-end PI latency for deep CNN models less than 1 minute with CPU. To this end, first, Flash proposes a low-latency convolution algorithm built upon a fast slot rotation operation and a novel data encoding scheme, which results in 4-94x performance gain over the state-of-the-art. Second, to minimize the communication cost introduced by the standard nonlinear activation function ReLU, Flash replaces the entire ReLUs with the polynomial $x^2+x$ and trains deep CNN models with the new training strategy. The trained models improve the inference accuracy for CIFAR-10/100 and TinyImageNet by 16% on average (up to 40% for ResNet-32) compared to prior art. Last, Flash proposes an efficient 2PC-based $x^2+x$ evaluation protocol that does not require any offline communication and that reduces the total communication cost to process the activation layer by 84-196x over the state-of-the-art. As a result, the end-to-end PI latency of Flash implemented on CPU is 0.02 minute for CIFAR-100 and 0.57 minute for TinyImageNet classification, while the total data communication is 0.07GB for CIFAR-100 and 0.22GB for TinyImageNet. Flash improves the state-of-the-art PI by 16-45x in latency and 84-196x in communication cost. Moreover, even for ImageNet, Flash can deliver the latency less than 1 minute on CPU with the total communication less than 1GB. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.16732v2-abstract-full').style.display = 'none'; document.getElementById('2401.16732v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.17449">arXiv:2312.17449</a> <span> [<a href="https://arxiv.org/pdf/2312.17449">pdf</a>, <a href="https://arxiv.org/format/2312.17449">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> DB-GPT: Empowering Database Interactions with Private Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xue%2C+S">Siqiao Xue</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Caigao Jiang</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+W">Wenhui Shi</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+F">Fangyin Cheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keting Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hongjun Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhiping Zhang</a>, <a href="/search/cs?searchtype=author&query=He%2C+J">Jianshan He</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hongyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Ganglin Wei</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Wang Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+F">Fan Zhou</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+D">Danrui Qi</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+H">Hong Yi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shaodong Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+F">Faqiang Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.17449v2-abstract-short" style="display: inline;"> The recent breakthroughs in large language models (LLMs) are positioned to transition many areas of software. Database technologies particularly have an important entanglement with LLMs as efficient and intuitive database interactions are paramount. In this paper, we present DB-GPT, a revolutionary and production-ready project that integrates LLMs with traditional database systems to enhance user… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.17449v2-abstract-full').style.display = 'inline'; document.getElementById('2312.17449v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.17449v2-abstract-full" style="display: none;"> The recent breakthroughs in large language models (LLMs) are positioned to transition many areas of software. Database technologies particularly have an important entanglement with LLMs as efficient and intuitive database interactions are paramount. In this paper, we present DB-GPT, a revolutionary and production-ready project that integrates LLMs with traditional database systems to enhance user experience and accessibility. DB-GPT is designed to understand natural language queries, provide context-aware responses, and generate complex SQL queries with high accuracy, making it an indispensable tool for users ranging from novice to expert. The core innovation in DB-GPT lies in its private LLM technology, which is fine-tuned on domain-specific corpora to maintain user privacy and ensure data security while offering the benefits of state-of-the-art LLMs. We detail the architecture of DB-GPT, which includes a novel retrieval augmented generation (RAG) knowledge system, an adaptive learning mechanism to continuously improve performance based on user feedback and a service-oriented multi-model framework (SMMF) with powerful data-driven agents. Our extensive experiments and user studies confirm that DB-GPT represents a paradigm shift in database interactions, offering a more natural, efficient, and secure way to engage with data repositories. The paper concludes with a discussion of the implications of DB-GPT framework on the future of human-database interaction and outlines potential avenues for further enhancements and applications in the field. The project code is available at https://github.com/eosphoros-ai/DB-GPT. Experience DB-GPT for yourself by installing it with the instructions https://github.com/eosphoros-ai/DB-GPT#install and view a concise 10-minute video at https://www.youtube.com/watch?v=KYs4nTDzEhk. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.17449v2-abstract-full').style.display = 'none'; document.getElementById('2312.17449v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.15707">arXiv:2312.15707</a> <span> [<a href="https://arxiv.org/pdf/2312.15707">pdf</a>, <a href="https://arxiv.org/format/2312.15707">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> High-Fidelity Diffusion-based Image Editing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hou%2C+C">Chen Hou</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guoqiang Wei</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhibo Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.15707v3-abstract-short" style="display: inline;"> Diffusion models have attained remarkable success in the domains of image generation and editing. It is widely recognized that employing larger inversion and denoising steps in diffusion model leads to improved image reconstruction quality. However, the editing performance of diffusion models tends to be no more satisfactory even with increasing denoising steps. The deficiency in editing could be… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.15707v3-abstract-full').style.display = 'inline'; document.getElementById('2312.15707v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.15707v3-abstract-full" style="display: none;"> Diffusion models have attained remarkable success in the domains of image generation and editing. It is widely recognized that employing larger inversion and denoising steps in diffusion model leads to improved image reconstruction quality. However, the editing performance of diffusion models tends to be no more satisfactory even with increasing denoising steps. The deficiency in editing could be attributed to the conditional Markovian property of the editing process, where errors accumulate throughout denoising steps. To tackle this challenge, we first propose an innovative framework where a rectifier module is incorporated to modulate diffusion model weights with residual features, thereby providing compensatory information to bridge the fidelity gap. Furthermore, we introduce a novel learning paradigm aimed at minimizing error propagation during the editing process, which trains the editing procedure in a manner similar to denoising score-matching. Extensive experiments demonstrate that our proposed framework and training strategy achieve high-fidelity reconstruction and editing results across various levels of denoising steps, meanwhile exhibits exceptional performance in terms of both quantitative metric and qualitative assessments. Moreover, we explore our model's generalization through several applications like image-to-image translation and out-of-domain image editing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.15707v3-abstract-full').style.display = 'none'; document.getElementById('2312.15707v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.15561">arXiv:2312.15561</a> <span> [<a href="https://arxiv.org/pdf/2312.15561">pdf</a>, <a href="https://arxiv.org/format/2312.15561">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> README: Bridging Medical Jargon and Lay Understanding for Patient Education through Data-Centric NLP </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+Z">Zonghai Yao</a>, <a href="/search/cs?searchtype=author&query=Kantu%2C+N+S">Nandyala Siddharth Kantu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guanghao Wei</a>, <a href="/search/cs?searchtype=author&query=Tran%2C+H">Hieu Tran</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+Z">Zhangqi Duan</a>, <a href="/search/cs?searchtype=author&query=Kwon%2C+S">Sunjae Kwon</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhichao Yang</a>, <a href="/search/cs?searchtype=author&query=team%2C+R+a">README annotation team</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+H">Hong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.15561v5-abstract-short" style="display: inline;"> The advancement in healthcare has shifted focus toward patient-centric approaches, particularly in self-care and patient education, facilitated by access to Electronic Health Records (EHR). However, medical jargon in EHRs poses significant challenges in patient comprehension. To address this, we introduce a new task of automatically generating lay definitions, aiming to simplify complex medical te… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.15561v5-abstract-full').style.display = 'inline'; document.getElementById('2312.15561v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.15561v5-abstract-full" style="display: none;"> The advancement in healthcare has shifted focus toward patient-centric approaches, particularly in self-care and patient education, facilitated by access to Electronic Health Records (EHR). However, medical jargon in EHRs poses significant challenges in patient comprehension. To address this, we introduce a new task of automatically generating lay definitions, aiming to simplify complex medical terms into patient-friendly lay language. We first created the README dataset, an extensive collection of over 50,000 unique (medical term, lay definition) pairs and 300,000 mentions, each offering context-aware lay definitions manually annotated by domain experts. We have also engineered a data-centric Human-AI pipeline that synergizes data filtering, augmentation, and selection to improve data quality. We then used README as the training data for models and leveraged a Retrieval-Augmented Generation method to reduce hallucinations and improve the quality of model outputs. Our extensive automatic and human evaluations demonstrate that open-source mobile-friendly models, when fine-tuned with high-quality data, are capable of matching or even surpassing the performance of state-of-the-art closed-source large language models like ChatGPT. This research represents a significant stride in closing the knowledge gap in patient education and advancing patient-centric healthcare solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.15561v5-abstract-full').style.display = 'none'; document.getElementById('2312.15561v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in Findings of the Association for Computational Linguistics: EMNLP 2024. We sincerely appreciate the tremendous efforts of the entire README annotation team throughout the expert annotation process of the README dataset</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.14385">arXiv:2312.14385</a> <span> [<a href="https://arxiv.org/pdf/2312.14385">pdf</a>, <a href="https://arxiv.org/format/2312.14385">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Generative AI Beyond LLMs: System Implications of Multi-Modal Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Golden%2C+A">Alicia Golden</a>, <a href="/search/cs?searchtype=author&query=Hsia%2C+S">Samuel Hsia</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+F">Fei Sun</a>, <a href="/search/cs?searchtype=author&query=Acun%2C+B">Bilge Acun</a>, <a href="/search/cs?searchtype=author&query=Hosmer%2C+B">Basil Hosmer</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+Y">Yejin Lee</a>, <a href="/search/cs?searchtype=author&query=DeVito%2C+Z">Zachary DeVito</a>, <a href="/search/cs?searchtype=author&query=Johnson%2C+J">Jeff Johnson</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Gu-Yeon Wei</a>, <a href="/search/cs?searchtype=author&query=Brooks%2C+D">David Brooks</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+C">Carole-Jean Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.14385v2-abstract-short" style="display: inline;"> As the development of large-scale Generative AI models evolve beyond text (1D) generation to include image (2D) and video (3D) generation, processing spatial and temporal information presents unique challenges to quality, performance, and efficiency. We present the first work towards understanding this new system design space for multi-modal text-to-image (TTI) and text-to-video (TTV) generation m… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.14385v2-abstract-full').style.display = 'inline'; document.getElementById('2312.14385v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.14385v2-abstract-full" style="display: none;"> As the development of large-scale Generative AI models evolve beyond text (1D) generation to include image (2D) and video (3D) generation, processing spatial and temporal information presents unique challenges to quality, performance, and efficiency. We present the first work towards understanding this new system design space for multi-modal text-to-image (TTI) and text-to-video (TTV) generation models. Current model architecture designs are bifurcated into 2 categories: Diffusion- and Transformer-based models. Our systematic performance characterization on a suite of eight representative TTI/TTV models shows that after state-of-the-art optimization techniques such as Flash Attention are applied, Convolution accounts for up to 44% of execution time for Diffusion-based TTI models, while Linear layers consume up to 49% of execution time for Transformer-based models. We additionally observe that Diffusion-based TTI models resemble the Prefill stage of LLM inference, and benefit from 1.1-2.5x greater speedup from Flash Attention than Transformer-based TTI models that resemble the Decode phase. Since optimizations designed for LLMs do not map directly onto TTI/TTV models, we must conduct a thorough characterization of these workloads to gain insights for new optimization opportunities. In doing so, we define sequence length in the context of TTI/TTV models and observe sequence length can vary up to 4x in Diffusion model inference. We additionally observe temporal aspects of TTV workloads pose unique system bottlenecks, with Temporal Attention accounting for over 60% of total Attention time. Overall, our in-depth system performance characterization is a critical first step towards designing efficient and deployable systems for emerging TTI/TTV workloads. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.14385v2-abstract-full').style.display = 'none'; document.getElementById('2312.14385v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Published at 2024 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.01272">arXiv:2312.01272</a> <span> [<a href="https://arxiv.org/pdf/2312.01272">pdf</a>, <a href="https://arxiv.org/format/2312.01272">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Genomics">q-bio.GN</span> </div> </div> <p class="title is-5 mathjax"> Multiscale Topology in Interactomic Network: From Transcriptome to Antiaddiction Drug Repurposing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyan Du</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+T">Tingjun Hou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.01272v1-abstract-short" style="display: inline;"> The escalating drug addiction crisis in the United States underscores the urgent need for innovative therapeutic strategies. This study embarked on an innovative and rigorous strategy to unearth potential drug repurposing candidates for opioid and cocaine addiction treatment, bridging the gap between transcriptomic data analysis and drug discovery. We initiated our approach by conducting different… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.01272v1-abstract-full').style.display = 'inline'; document.getElementById('2312.01272v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.01272v1-abstract-full" style="display: none;"> The escalating drug addiction crisis in the United States underscores the urgent need for innovative therapeutic strategies. This study embarked on an innovative and rigorous strategy to unearth potential drug repurposing candidates for opioid and cocaine addiction treatment, bridging the gap between transcriptomic data analysis and drug discovery. We initiated our approach by conducting differential gene expression analysis on addiction-related transcriptomic data to identify key genes. We propose a novel topological differentiation to identify key genes from a protein-protein interaction (PPI) network derived from DEGs. This method utilizes persistent Laplacians to accurately single out pivotal nodes within the network, conducting this analysis in a multiscale manner to ensure high reliability. Through rigorous literature validation, pathway analysis, and data-availability scrutiny, we identified three pivotal molecular targets, mTOR, mGluR5, and NMDAR, for drug repurposing from DrugBank. We crafted machine learning models employing two natural language processing (NLP)-based embeddings and a traditional 2D fingerprint, which demonstrated robust predictive ability in gauging binding affinities of DrugBank compounds to selected targets. Furthermore, we elucidated the interactions of promising drugs with the targets and evaluated their drug-likeness. This study delineates a multi-faceted and comprehensive analytical framework, amalgamating bioinformatics, topological data analysis and machine learning, for drug repurposing in addiction treatment, setting the stage for subsequent experimental validation. The versatility of the methods we developed allows for applications across a range of diseases and transcriptomic datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.01272v1-abstract-full').style.display = 'none'; document.getElementById('2312.01272v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.14062">arXiv:2311.14062</a> <span> [<a href="https://arxiv.org/pdf/2311.14062">pdf</a>, <a href="https://arxiv.org/format/2311.14062">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Hardware Resilience Properties of Text-Guided Image Classifiers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wasim%2C+S+T">Syed Talal Wasim</a>, <a href="/search/cs?searchtype=author&query=Soboka%2C+K+H">Kabila Haile Soboka</a>, <a href="/search/cs?searchtype=author&query=Mahmoud%2C+A">Abdulrahman Mahmoud</a>, <a href="/search/cs?searchtype=author&query=Khan%2C+S">Salman Khan</a>, <a href="/search/cs?searchtype=author&query=Brooks%2C+D">David Brooks</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Gu-Yeon Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.14062v2-abstract-short" style="display: inline;"> This paper presents a novel method to enhance the reliability of image classification models during deployment in the face of transient hardware errors. By utilizing enriched text embeddings derived from GPT-3 with question prompts per class and CLIP pretrained text encoder, we investigate their impact as an initialization for the classification layer. Our approach achieves a remarkable… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.14062v2-abstract-full').style.display = 'inline'; document.getElementById('2311.14062v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.14062v2-abstract-full" style="display: none;"> This paper presents a novel method to enhance the reliability of image classification models during deployment in the face of transient hardware errors. By utilizing enriched text embeddings derived from GPT-3 with question prompts per class and CLIP pretrained text encoder, we investigate their impact as an initialization for the classification layer. Our approach achieves a remarkable $5.5\times$ average increase in hardware reliability (and up to $14\times$) across various architectures in the most critical layer, with minimal accuracy drop ($0.3\%$ on average) compared to baseline PyTorch models. Furthermore, our method seamlessly integrates with any image classification backbone, showcases results across various network architectures, decreases parameter and FLOPs overhead, and follows a consistent training recipe. This research offers a practical and efficient solution to bolster the robustness of image classification models against hardware failures, with potential implications for future studies in this domain. Our code and models are released at https://github.com/TalalWasim/TextGuidedResilience. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.14062v2-abstract-full').style.display = 'none'; document.getElementById('2311.14062v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at NeurIPS 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.10982">arXiv:2311.10982</a> <span> [<a href="https://arxiv.org/pdf/2311.10982">pdf</a>, <a href="https://arxiv.org/format/2311.10982">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Make Pixels Dance: High-Dynamic Video Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+Y">Yan Zeng</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guoqiang Wei</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+J">Jiani Zheng</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+J">Jiaxin Zou</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Y">Yang Wei</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuchen Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.10982v1-abstract-short" style="display: inline;"> Creating high-dynamic videos such as motion-rich actions and sophisticated visual effects poses a significant challenge in the field of artificial intelligence. Unfortunately, current state-of-the-art video generation methods, primarily focusing on text-to-video generation, tend to produce video clips with minimal motions despite maintaining high fidelity. We argue that relying solely on text inst… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.10982v1-abstract-full').style.display = 'inline'; document.getElementById('2311.10982v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.10982v1-abstract-full" style="display: none;"> Creating high-dynamic videos such as motion-rich actions and sophisticated visual effects poses a significant challenge in the field of artificial intelligence. Unfortunately, current state-of-the-art video generation methods, primarily focusing on text-to-video generation, tend to produce video clips with minimal motions despite maintaining high fidelity. We argue that relying solely on text instructions is insufficient and suboptimal for video generation. In this paper, we introduce PixelDance, a novel approach based on diffusion models that incorporates image instructions for both the first and last frames in conjunction with text instructions for video generation. Comprehensive experimental results demonstrate that PixelDance trained with public data exhibits significantly better proficiency in synthesizing videos with complex scenes and intricate motions, setting a new standard for video generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.10982v1-abstract-full').style.display = 'none'; document.getElementById('2311.10982v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.20602">arXiv:2310.20602</a> <span> [<a href="https://arxiv.org/pdf/2310.20602">pdf</a>, <a href="https://arxiv.org/format/2310.20602">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Compliant actuators that mimic biological muscle performance with applications in a highly biomimetic robotic arm </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+H">Haosen Yang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guowu Wei</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+L">Lei Ren</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L">Lingyun Yan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.20602v1-abstract-short" style="display: inline;"> This paper endeavours to bridge the existing gap in muscular actuator design for ligament-skeletal-inspired robots, thereby fostering the evolution of these robotic systems. We introduce two novel compliant actuators, namely the Internal Torsion Spring Compliant Actuator (ICA) and the External Spring Compliant Actuator (ECA), and present a comparative analysis against the previously conceived Magn… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.20602v1-abstract-full').style.display = 'inline'; document.getElementById('2310.20602v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.20602v1-abstract-full" style="display: none;"> This paper endeavours to bridge the existing gap in muscular actuator design for ligament-skeletal-inspired robots, thereby fostering the evolution of these robotic systems. We introduce two novel compliant actuators, namely the Internal Torsion Spring Compliant Actuator (ICA) and the External Spring Compliant Actuator (ECA), and present a comparative analysis against the previously conceived Magnet Integrated Soft Actuator (MISA) through computational and experimental results. These actuators, employing a motor-tendon system, emulate biological muscle-like forms, enhancing artificial muscle technology. A robotic arm application inspired by the skeletal ligament system is presented. Experiments demonstrate satisfactory power in tasks like lifting dumbbells (peak power: 36W), playing table tennis (end-effector speed: 3.2 m/s), and door opening, without compromising biomimetic aesthetics. Compared to other linear stiffness serial elastic actuators (SEAs), ECA and ICA exhibit high power-to-volume (361 x 10^3 W/m) and power-to-mass (111.6 W/kg) ratios respectively, endorsing the biomimetic design's promise in robotic development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.20602v1-abstract-full').style.display = 'none'; document.getElementById('2310.20602v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.18765">arXiv:2310.18765</a> <span> [<a href="https://arxiv.org/pdf/2310.18765">pdf</a>, <a href="https://arxiv.org/format/2310.18765">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Rethinking Semi-Supervised Imbalanced Node Classification from Bias-Variance Decomposition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yan%2C+D">Divin Yan</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Gengchen Wei</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chen Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shengzhong Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zengfeng Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.18765v3-abstract-short" style="display: inline;"> This paper introduces a new approach to address the issue of class imbalance in graph neural networks (GNNs) for learning on graph-structured data. Our approach integrates imbalanced node classification and Bias-Variance Decomposition, establishing a theoretical framework that closely relates data imbalance to model variance. We also leverage graph augmentation technique to estimate the variance,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.18765v3-abstract-full').style.display = 'inline'; document.getElementById('2310.18765v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.18765v3-abstract-full" style="display: none;"> This paper introduces a new approach to address the issue of class imbalance in graph neural networks (GNNs) for learning on graph-structured data. Our approach integrates imbalanced node classification and Bias-Variance Decomposition, establishing a theoretical framework that closely relates data imbalance to model variance. We also leverage graph augmentation technique to estimate the variance, and design a regularization term to alleviate the impact of imbalance. Exhaustive tests are conducted on multiple benchmarks, including naturally imbalanced datasets and public-split class-imbalanced datasets, demonstrating that our approach outperforms state-of-the-art methods in various imbalanced scenarios. This work provides a novel theoretical perspective for addressing the problem of imbalanced node classification in GNNs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.18765v3-abstract-full').style.display = 'none'; document.getElementById('2310.18765v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NeurIPS 2023</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Thirty-seventh Conference on Neural Information Processing Systems. (NeurIPS 2023) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.18299">arXiv:2310.18299</a> <span> [<a href="https://arxiv.org/pdf/2310.18299">pdf</a>, <a href="https://arxiv.org/format/2310.18299">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Enhancing the Performance of a Biomimetic Robotic Elbow-and-Forearm System Through Bionics-Inspired Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+H">Haosen Yang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guowu Wei</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+L">Lei Ren</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.18299v1-abstract-short" style="display: inline;"> This paper delineates the formulation and verification of an innovative robotic forearm and elbow design, mirroring the intricate biomechanics of human skeletal and ligament systems. Conventional robotic models often undervalue the substantial function of soft tissues, leading to a compromise between compactness, safety, stability, and range of motion. In contrast, this study proposes a holistic r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.18299v1-abstract-full').style.display = 'inline'; document.getElementById('2310.18299v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.18299v1-abstract-full" style="display: none;"> This paper delineates the formulation and verification of an innovative robotic forearm and elbow design, mirroring the intricate biomechanics of human skeletal and ligament systems. Conventional robotic models often undervalue the substantial function of soft tissues, leading to a compromise between compactness, safety, stability, and range of motion. In contrast, this study proposes a holistic replication of biological joints, encompassing bones, cartilage, ligaments, and tendons, culminating in a biomimetic robot. The research underscores the compact and stable structure of the human forearm, attributable to a tri-bone framework and diverse soft tissues. The methodology involves exhaustive examinations of human anatomy, succeeded by a theoretical exploration of the contribution of soft tissues to the stability of the prototype. The evaluation results unveil remarkable parallels between the range of motion of the robotic joints and their human counterparts. The robotic elbow emulates 98.8% of the biological elbow's range of motion, with high torque capacities of 11.25 Nm (extension) and 24 Nm (flexion). Similarly, the robotic forearm achieves 58.6% of the human forearm's rotational range, generating substantial output torques of 14 Nm (pronation) and 7.8 Nm (supination). Moreover, the prototype exhibits significant load-bearing abilities, resisting a 5kg dumbbell load without substantial displacement. It demonstrates a payload capacity exceeding 4kg and rapid action capabilities, such as lifting a 2kg dumbbell at a speed of 0.74Hz and striking a ping-pong ball at an end-effector speed of 3.2 m/s. This research underscores that a detailed anatomical study can address existing robotic design obstacles, optimize performance and anthropomorphic resemblance, and reaffirm traditional anatomical principles. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.18299v1-abstract-full').style.display = 'none'; document.getElementById('2310.18299v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.18283">arXiv:2310.18283</a> <span> [<a href="https://arxiv.org/pdf/2310.18283">pdf</a>, <a href="https://arxiv.org/format/2310.18283">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Development and Characteristics of a Highly Biomimetic Robotic Shoulder Through Bionics-Inspired Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+H">Haosen Yang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guowu Wei</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+L">Lei Ren</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.18283v1-abstract-short" style="display: inline;"> This paper critically analyzes conventional and biomimetic robotic arms, underscoring the trade-offs between size, motion range, and load capacity in current biomimetic models. By delving into the human shoulder's mechanical intelligence, particularly the glenohumeral joint's intricate features such as its unique ball-and-socket structure and self-locking mechanism, we pinpoint innovations that bo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.18283v1-abstract-full').style.display = 'inline'; document.getElementById('2310.18283v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.18283v1-abstract-full" style="display: none;"> This paper critically analyzes conventional and biomimetic robotic arms, underscoring the trade-offs between size, motion range, and load capacity in current biomimetic models. By delving into the human shoulder's mechanical intelligence, particularly the glenohumeral joint's intricate features such as its unique ball-and-socket structure and self-locking mechanism, we pinpoint innovations that bolster both stability and mobility while maintaining compactness. To substantiate these insights, we present a groundbreaking biomimetic robotic glenohumeral joint that authentically mirrors human musculoskeletal elements, from ligaments to tendons, integrating the biological joint's mechanical intelligence. Our exhaustive simulations and tests reveal enhanced flexibility and load capacity for the robotic joint. The advanced robotic arm demonstrates notable capabilities, including a significant range of motions and a 4 kg payload capacity, even exerting over 1.5 Nm torque. This study not only confirms the human shoulder joint's mechanical innovations but also introduces a pioneering design for a next-generation biomimetic robotic arm, setting a new benchmark in robotic technology. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.18283v1-abstract-full').style.display = 'none'; document.getElementById('2310.18283v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.15744">arXiv:2310.15744</a> <span> [<a href="https://arxiv.org/pdf/2310.15744">pdf</a>, <a href="https://arxiv.org/format/2310.15744">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Algebraic Topology">math.AT</span> </div> </div> <p class="title is-5 mathjax"> Analyzing Single Cell RNA Sequencing with Topological Nonnegative Matrix Factorization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hozumi%2C+Y">Yuta Hozumi</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.15744v1-abstract-short" style="display: inline;"> Single-cell RNA sequencing (scRNA-seq) is a relatively new technology that has stimulated enormous interest in statistics, data science, and computational biology due to the high dimensionality, complexity, and large scale associated with scRNA-seq data. Nonnegative matrix factorization (NMF) offers a unique approach due to its meta-gene interpretation of resulting low-dimensional components. Howe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.15744v1-abstract-full').style.display = 'inline'; document.getElementById('2310.15744v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.15744v1-abstract-full" style="display: none;"> Single-cell RNA sequencing (scRNA-seq) is a relatively new technology that has stimulated enormous interest in statistics, data science, and computational biology due to the high dimensionality, complexity, and large scale associated with scRNA-seq data. Nonnegative matrix factorization (NMF) offers a unique approach due to its meta-gene interpretation of resulting low-dimensional components. However, NMF approaches suffer from the lack of multiscale analysis. This work introduces two persistent Laplacian regularized NMF methods, namely, topological NMF (TNMF) and robust topological NMF (rTNMF). By employing a total of 12 datasets, we demonstrate that the proposed TNMF and rTNMF significantly outperform all other NMF-based methods. We have also utilized TNMF and rTNMF for the visualization of popular Uniform Manifold Approximation and Projection (UMAP) and t-distributed stochastic neighbor embedding (t-SNE). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.15744v1-abstract-full').style.display = 'none'; document.getElementById('2310.15744v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.14521">arXiv:2310.14521</a> <span> [<a href="https://arxiv.org/pdf/2310.14521">pdf</a>, <a href="https://arxiv.org/format/2310.14521">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Algebraic Topology">math.AT</span> </div> </div> <p class="title is-5 mathjax"> K-Nearest-Neighbors Induced Topological PCA for scRNA Sequence Data Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cottrell%2C+S">Sean Cottrell</a>, <a href="/search/cs?searchtype=author&query=Hozumi%2C+Y">Yuta Hozumi</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.14521v1-abstract-short" style="display: inline;"> Single-cell RNA sequencing (scRNA-seq) is widely used to reveal heterogeneity in cells, which has given us insights into cell-cell communication, cell differentiation, and differential gene expression. However, analyzing scRNA-seq data is a challenge due to sparsity and the large number of genes involved. Therefore, dimensionality reduction and feature selection are important for removing spurious… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.14521v1-abstract-full').style.display = 'inline'; document.getElementById('2310.14521v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.14521v1-abstract-full" style="display: none;"> Single-cell RNA sequencing (scRNA-seq) is widely used to reveal heterogeneity in cells, which has given us insights into cell-cell communication, cell differentiation, and differential gene expression. However, analyzing scRNA-seq data is a challenge due to sparsity and the large number of genes involved. Therefore, dimensionality reduction and feature selection are important for removing spurious signals and enhancing downstream analysis. Traditional PCA, a main workhorse in dimensionality reduction, lacks the ability to capture geometrical structure information embedded in the data, and previous graph Laplacian regularizations are limited by the analysis of only a single scale. We propose a topological Principal Components Analysis (tPCA) method by the combination of persistent Laplacian (PL) technique and L$_{2,1}$ norm regularization to address multiscale and multiclass heterogeneity issues in data. We further introduce a k-Nearest-Neighbor (kNN) persistent Laplacian technique to improve the robustness of our persistent Laplacian method. The proposed kNN-PL is a new algebraic topology technique which addresses the many limitations of the traditional persistent homology. Rather than inducing filtration via the varying of a distance threshold, we introduced kNN-tPCA, where filtrations are achieved by varying the number of neighbors in a kNN network at each step, and find that this framework has significant implications for hyper-parameter tuning. We validate the efficacy of our proposed tPCA and kNN-tPCA methods on 11 diverse benchmark scRNA-seq datasets, and showcase that our methods outperform other unsupervised PCA enhancements from the literature, as well as popular Uniform Manifold Approximation (UMAP), t-Distributed Stochastic Neighbor Embedding (tSNE), and Projection Non-Negative Matrix Factorization (NMF) by significant margins. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.14521v1-abstract-full').style.display = 'none'; document.getElementById('2310.14521v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.09092">arXiv:2310.09092</a> <span> [<a href="https://arxiv.org/pdf/2310.09092">pdf</a>, <a href="https://arxiv.org/format/2310.09092">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> iPUNet:Iterative Cross Field Guided Point Cloud Upsampling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guangshun Wei</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+H">Hao Pan</a>, <a href="/search/cs?searchtype=author&query=Zhuang%2C+S">Shaojie Zhuang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yuanfeng Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Changjian Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.09092v1-abstract-short" style="display: inline;"> Point clouds acquired by 3D scanning devices are often sparse, noisy, and non-uniform, causing a loss of geometric features. To facilitate the usability of point clouds in downstream applications, given such input, we present a learning-based point upsampling method, i.e., iPUNet, which generates dense and uniform points at arbitrary ratios and better captures sharp features. To generate feature-a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.09092v1-abstract-full').style.display = 'inline'; document.getElementById('2310.09092v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.09092v1-abstract-full" style="display: none;"> Point clouds acquired by 3D scanning devices are often sparse, noisy, and non-uniform, causing a loss of geometric features. To facilitate the usability of point clouds in downstream applications, given such input, we present a learning-based point upsampling method, i.e., iPUNet, which generates dense and uniform points at arbitrary ratios and better captures sharp features. To generate feature-aware points, we introduce cross fields that are aligned to sharp geometric features by self-supervision to guide point generation. Given cross field defined frames, we enable arbitrary ratio upsampling by learning at each input point a local parameterized surface. The learned surface consumes the neighboring points and 2D tangent plane coordinates as input, and maps onto a continuous surface in 3D where arbitrary ratios of output points can be sampled. To solve the non-uniformity of input points, on top of the cross field guided upsampling, we further introduce an iterative strategy that refines the point distribution by moving sparse points onto the desired continuous 3D surface in each iteration. Within only a few iterations, the sparse points are evenly distributed and their corresponding dense samples are more uniform and better capture geometric features. Through extensive evaluations on diverse scans of objects and scenes, we demonstrate that iPUNet is robust to handle noisy and non-uniformly distributed inputs, and outperforms state-of-the-art point cloud upsampling methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.09092v1-abstract-full').style.display = 'none'; document.getElementById('2310.09092v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.07570">arXiv:2310.07570</a> <span> [<a href="https://arxiv.org/pdf/2310.07570">pdf</a>, <a href="https://arxiv.org/format/2310.07570">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Algebraic Topology">math.AT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3934/fods.2024009">10.3934/fods.2024009 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> ChatGPT for Computational Topology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jian Liu</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+L">Li Shen</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guo-Wei Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.07570v3-abstract-short" style="display: inline;"> ChatGPT represents a significant milestone in the field of artificial intelligence (AI), finding widespread applications across diverse domains. However, its effectiveness in mathematical contexts has been somewhat constrained by its susceptibility to conceptual errors. Concurrently, topological data analysis (TDA), a relatively new discipline, has garnered substantial interest in recent years. No… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.07570v3-abstract-full').style.display = 'inline'; document.getElementById('2310.07570v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.07570v3-abstract-full" style="display: none;"> ChatGPT represents a significant milestone in the field of artificial intelligence (AI), finding widespread applications across diverse domains. However, its effectiveness in mathematical contexts has been somewhat constrained by its susceptibility to conceptual errors. Concurrently, topological data analysis (TDA), a relatively new discipline, has garnered substantial interest in recent years. Nonetheless, the advancement of TDA is impeded by the limited understanding of computational algorithms and coding proficiency among theoreticians. This work endeavors to bridge the gap between theoretical topological concepts and their practical implementation in computational topology through the utilization of ChatGPT. We showcase how a pure theoretician, devoid of computational experience and coding skills, can effectively transform mathematical formulations and concepts into functional code for computational topology with the assistance of ChatGPT. Our strategy outlines a productive process wherein a mathematician trains ChatGPT on pure mathematical concepts, steers ChatGPT towards generating computational topology code, and subsequently validates the generated code using established examples. Our specific case studies encompass the computation of Betti numbers, Laplacian matrices, and Dirac matrices for simplicial complexes, as well as the persistence of various homologies and Laplacians. Furthermore, we explore the application of ChatGPT in computing recently developed topological theories for hypergraphs and digraphs. This work serves as an initial step towards effectively transforming pure mathematical theories into practical computational tools, with the ultimate goal of enabling real applications across diverse fields. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.07570v3-abstract-full').style.display = 'none'; document.getElementById('2310.07570v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Foundations of Data Science, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.05171">arXiv:2310.05171</a> <span> [<a href="https://arxiv.org/pdf/2310.05171">pdf</a>, <a href="https://arxiv.org/format/2310.05171">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Multi-Ship Tracking by Robust Similarity metric </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hongyu Zhao</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Gongming Wei</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+Y">Yang Xiao</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+X">Xianglei Xing</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.05171v1-abstract-short" style="display: inline;"> Multi-ship tracking (MST) as a core technology has been proven to be applied to situational awareness at sea and the development of a navigational system for autonomous ships. Despite impressive tracking outcomes achieved by multi-object tracking (MOT) algorithms for pedestrian and vehicle datasets, these models and techniques exhibit poor performance when applied to ship datasets. Intersection of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.05171v1-abstract-full').style.display = 'inline'; document.getElementById('2310.05171v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.05171v1-abstract-full" style="display: none;"> Multi-ship tracking (MST) as a core technology has been proven to be applied to situational awareness at sea and the development of a navigational system for autonomous ships. Despite impressive tracking outcomes achieved by multi-object tracking (MOT) algorithms for pedestrian and vehicle datasets, these models and techniques exhibit poor performance when applied to ship datasets. Intersection of Union (IoU) is the most popular metric for computing similarity used in object tracking. The low frame rates and severe image shake caused by wave turbulence in ship datasets often result in minimal, or even zero, Intersection of Union (IoU) between the predicted and detected bounding boxes. This issue contributes to frequent identity switches of tracked objects, undermining the tracking performance. In this paper, we address the weaknesses of IoU by incorporating the smallest convex shapes that enclose both the predicted and detected bounding boxes. The calculation of the tracking version of IoU (TIoU) metric considers not only the size of the overlapping area between the detection bounding box and the prediction box, but also the similarity of their shapes. Through the integration of the TIoU into state-of-the-art object tracking frameworks, such as DeepSort and ByteTrack, we consistently achieve improvements in the tracking performance of these frameworks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.05171v1-abstract-full').style.display = 'none'; document.getElementById('2310.05171v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.03103">arXiv:2310.03103</a> <span> [<a href="https://arxiv.org/pdf/2310.03103">pdf</a>, <a href="https://arxiv.org/format/2310.03103">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Learning to Prompt Your Domain for Vision-Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guoyizhe Wei</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+F">Feng Wang</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+A">Anshul Shah</a>, <a href="/search/cs?searchtype=author&query=Chellappa%2C+R">Rama Chellappa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.03103v5-abstract-short" style="display: inline;"> Prompt learning has recently become a very efficient transfer learning paradigm for Contrastive Language Image Pretraining (CLIP) models. Compared with fine-tuning the entire encoder, prompt learning can obtain highly competitive results by optimizing only a small number of parameters, which presents considerably exciting benefits for federated learning applications that prioritizes communication… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.03103v5-abstract-full').style.display = 'inline'; document.getElementById('2310.03103v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.03103v5-abstract-full" style="display: none;"> Prompt learning has recently become a very efficient transfer learning paradigm for Contrastive Language Image Pretraining (CLIP) models. Compared with fine-tuning the entire encoder, prompt learning can obtain highly competitive results by optimizing only a small number of parameters, which presents considerably exciting benefits for federated learning applications that prioritizes communication efficiency. However, in this work, we identify that directly transferring prompt learning approaches into federated learning does not yield favorable results since the model often suffers from considerable domain gaps across different clients. To address this issue, we propose ADAPT, a novel domain-aware prompt learning approach that facilitates both intra- and inter-domain prompts across federated participants. The basic idea of ADAPT is that the prompted CLIP should detect the input image's domain correspondence and before making the prediction of its category. Extensive experiments of ADAPT demonstrate its significant efficiency and effectiveness in federated learning. For example, by learning and sharing only 0.08M parameters, our ADAPT attains a 68.4% average accuracy over six domains in the DomainNet dataset, which improves the original CLIP by a large margin of 14.8%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.03103v5-abstract-full').style.display = 'none'; document.getElementById('2310.03103v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Wei%2C+G&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Wei%2C+G&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+G&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+G&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Wei%2C+G&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>