CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 631 results for author: <span class="mathjax">Gu, S</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/" aria-role="search"> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Gu, S"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Gu%2C+S&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Gu, S"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Gu%2C+S&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Gu%2C+S&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Gu%2C+S&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Gu%2C+S&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Gu%2C+S&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Gu%2C+S&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11299">arXiv:2411.11299</a> <span> [<a href="https://arxiv.org/pdf/2411.11299">pdf</a>, <a href="https://arxiv.org/format/2411.11299">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Receiver-device-independent quantum secure direct communication </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Liu%2C+C">Cheng Liu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+C">Cheng Zhang</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shi-Pu Gu</a>, <a href="/search/?searchtype=author&query=Wang%2C+X">Xing-Fu Wang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+L">Lan Zhou</a>, <a href="/search/?searchtype=author&query=Sheng%2C+Y">Yu-Bo Sheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11299v1-abstract-short" style="display: inline;"> Quantum secure direct communication (QSDC) enables the message sender to directly send secure messages to the receiver through the quantum channel without keys. Device-independent (DI) and measurement-device-independent (MDI) QSDC protocols can enhance QSDC's practical security in theory. DI QSDC requires extremely high global detection efficiency and has quite low secure communication distance. D… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11299v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11299v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11299v1-abstract-full" style="display: none;"> Quantum secure direct communication (QSDC) enables the message sender to directly send secure messages to the receiver through the quantum channel without keys. Device-independent (DI) and measurement-device-independent (MDI) QSDC protocols can enhance QSDC's practical security in theory. DI QSDC requires extremely high global detection efficiency and has quite low secure communication distance. DI and MDI QSDC both require high-quality entanglement. Current entanglement sources prepare entangled photon pairs with low efficiency, largely reducing their practical communication efficiency. In the paper, we propose a single-photon-based receiver-device-independent (RDI) QSDC protocol. It only relies on the trusted single-photon source, which is nearly on-demand under current technology, and treats all the receiving devices in both communication parties as ``black-boxes''. The parties ensure the message security only from the observed statistics. We develop a numerical method to simulate its performance in practical noisy communication situation. RDI QSDC provides the same security level as MDI QSDC. Compared with DI and MDI QSDC, RDI QSDC has some advantages. First, it uses the single-photon source and single-photon measurement, which makes it obtain the practical communication efficiency about 3415 times of that in DI QSDC and easy to implement. The whole protocol is feasible with current technology. Second, it has higher photon loss robustness and noise tolerance than DI QSDC, which enables it to have a secure communication distance about 26 times of that in DI QSDC. Based on above features, the RDI QSDC protocol makes it possible to achieve highly-secure and high-efficient QSDC in the near future. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11299v1-abstract-full').style.display = 'none'; document.getElementById('2411.11299v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18558">arXiv:2410.18558</a> <span> [<a href="https://arxiv.org/pdf/2410.18558">pdf</a>, <a href="https://arxiv.org/format/2410.18558">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Infinity-MM: Scaling Multimodal Performance with Large-Scale and High-Quality Instruction Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gu%2C+S">Shuhao Gu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+J">Jialing Zhang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+S">Siyuan Zhou</a>, <a href="/search/?searchtype=author&query=Yu%2C+K">Kevin Yu</a>, <a href="/search/?searchtype=author&query=Xing%2C+Z">Zhaohu Xing</a>, <a href="/search/?searchtype=author&query=Wang%2C+L">Liangdong Wang</a>, <a href="/search/?searchtype=author&query=Cao%2C+Z">Zhou Cao</a>, <a href="/search/?searchtype=author&query=Jia%2C+J">Jintao Jia</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zhuoyi Zhang</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y">Yixuan Wang</a>, <a href="/search/?searchtype=author&query=Hu%2C+Z">Zhenchong Hu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+B">Bo-Wen Zhang</a>, <a href="/search/?searchtype=author&query=Li%2C+J">Jijie Li</a>, <a href="/search/?searchtype=author&query=Liang%2C+D">Dong Liang</a>, <a href="/search/?searchtype=author&query=Zhao%2C+Y">Yingli Zhao</a>, <a href="/search/?searchtype=author&query=Ao%2C+Y">Yulong Ao</a>, <a href="/search/?searchtype=author&query=Liu%2C+Y">Yaoqi Liu</a>, <a href="/search/?searchtype=author&query=Feng%2C+F">Fangxiang Feng</a>, <a href="/search/?searchtype=author&query=Liu%2C+G">Guang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18558v1-abstract-short" style="display: inline;"> Vision-Language Models (VLMs) have recently made significant progress, but the limited scale and quality of open-source instruction data hinder their performance compared to closed-source models. In this work, we address this limitation by introducing Infinity-MM, a large-scale multimodal instruction dataset with 40 million samples, enhanced through rigorous quality filtering and deduplication. We… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18558v1-abstract-full').style.display = 'inline'; document.getElementById('2410.18558v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18558v1-abstract-full" style="display: none;"> Vision-Language Models (VLMs) have recently made significant progress, but the limited scale and quality of open-source instruction data hinder their performance compared to closed-source models. In this work, we address this limitation by introducing Infinity-MM, a large-scale multimodal instruction dataset with 40 million samples, enhanced through rigorous quality filtering and deduplication. We also propose a synthetic instruction generation method based on open-source VLMs, using detailed image annotations and diverse question generation. Using this data, we trained a 2-billion-parameter VLM, Aquila-VL-2B, achieving state-of-the-art (SOTA) performance for models of similar scale. This demonstrates that expanding instruction data and generating synthetic data can significantly improve the performance of open-source models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18558v1-abstract-full').style.display = 'none'; document.getElementById('2410.18558v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.18505">arXiv:2410.18505</a> <span> [<a href="https://arxiv.org/pdf/2410.18505">pdf</a>, <a href="https://arxiv.org/format/2410.18505">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> CCI3.0-HQ: a large-scale Chinese dataset of high quality designed for pre-training large language models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+L">Liangdong Wang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+B">Bo-Wen Zhang</a>, <a href="/search/?searchtype=author&query=Wu%2C+C">Chengwei Wu</a>, <a href="/search/?searchtype=author&query=Zhao%2C+H">Hanyu Zhao</a>, <a href="/search/?searchtype=author&query=Shi%2C+X">Xiaofeng Shi</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shuhao Gu</a>, <a href="/search/?searchtype=author&query=Li%2C+J">Jijie Li</a>, <a href="/search/?searchtype=author&query=Ma%2C+Q">Quanyue Ma</a>, <a href="/search/?searchtype=author&query=Pan%2C+T">TengFei Pan</a>, <a href="/search/?searchtype=author&query=Liu%2C+G">Guang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.18505v2-abstract-short" style="display: inline;"> We present CCI3.0-HQ (https://huggingface.co/datasets/BAAI/CCI3-HQ), a high-quality 500GB subset of the Chinese Corpora Internet 3.0 (CCI3.0)(https://huggingface.co/datasets/BAAI/CCI3-Data), developed using a novel two-stage hybrid filtering pipeline that significantly enhances data quality. To evaluate its effectiveness, we trained a 0.5B parameter model from scratch on 100B tokens across various… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18505v2-abstract-full').style.display = 'inline'; document.getElementById('2410.18505v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.18505v2-abstract-full" style="display: none;"> We present CCI3.0-HQ (https://huggingface.co/datasets/BAAI/CCI3-HQ), a high-quality 500GB subset of the Chinese Corpora Internet 3.0 (CCI3.0)(https://huggingface.co/datasets/BAAI/CCI3-Data), developed using a novel two-stage hybrid filtering pipeline that significantly enhances data quality. To evaluate its effectiveness, we trained a 0.5B parameter model from scratch on 100B tokens across various datasets, achieving superior performance on 10 benchmarks in a zero-shot setting compared to CCI3.0, SkyPile, and WanjuanV1. The high-quality filtering process effectively distills the capabilities of the Qwen2-72B-instruct model into a compact 0.5B model, attaining optimal F1 scores for Chinese web data classification. We believe this open-access dataset will facilitate broader access to high-quality language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.18505v2-abstract-full').style.display = 'none'; document.getElementById('2410.18505v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.17497">arXiv:2410.17497</a> <span> [<a href="https://arxiv.org/pdf/2410.17497">pdf</a>, <a href="https://arxiv.org/format/2410.17497">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Solar and Stellar Astrophysics">astro-ph.SR</span> </div> </div> <p class="title is-5 mathjax"> Further study of starspot activity and measurement of differential rotation for SZ Piscium </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Xiang%2C+Y">Yue Xiang</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shenghong Gu</a>, <a href="/search/?searchtype=author&query=Cameron%2C+A+C">A. Collier Cameron</a>, <a href="/search/?searchtype=author&query=Barnes%2C+J+R">J. R. Barnes</a>, <a href="/search/?searchtype=author&query=Cao%2C+D">Dongtao Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.17497v1-abstract-short" style="display: inline;"> We present a series of 9 Doppler images of the magnetically active K component of the RS CVn-type binary SZ Psc, based on the high-resolution spectroscopic data collected from 2014 to 2018. We apply least-squares deconvolution to all spectra to extract the average profiles with high signal-to-noise ratios (SNRs) for Doppler imaging. The surface maps of the K subgiant show starspots widely distribu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17497v1-abstract-full').style.display = 'inline'; document.getElementById('2410.17497v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.17497v1-abstract-full" style="display: none;"> We present a series of 9 Doppler images of the magnetically active K component of the RS CVn-type binary SZ Psc, based on the high-resolution spectroscopic data collected from 2014 to 2018. We apply least-squares deconvolution to all spectra to extract the average profiles with high signal-to-noise ratios (SNRs) for Doppler imaging. The surface maps of the K subgiant show starspots widely distributed along latitude and longitude. A prominent, non-axisymmetric polar spot around phase 0 is revealed by all images with sufficient phase coverage, which may be a stable feature on the K component. The starspots evolve in a time scale of one month. We have determined the surface shear rate of the K component from the starspot maps reconstructed 10 days apart in 2017 Nov--Dec, through the cross-correlation method. The surface differential rotation parameters are $惟_{eq} = 1.591 \pm 0.002$ rad d$^{-1}$ and $螖惟= 0.035 \pm 0.003$ rad d$^{-1}$. The absorption lines contributed from the tertiary component are detected in all LSD profiles of SZ Psc, and we measure the radial velocity of the binary system and the tertiary component to derive an elliptical orbit with a period of $1530 \pm 3$ days and a mass of $0.75 \pm 0.06$ M$\odot$ for the tertiary component. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.17497v1-abstract-full').style.display = 'none'; document.getElementById('2410.17497v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 8 figures, accepted for publication in ApJ</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13271">arXiv:2410.13271</a> <span> [<a href="https://arxiv.org/pdf/2410.13271">pdf</a>, <a href="https://arxiv.org/format/2410.13271">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Inductive Gradient Adjustment For Spectral Bias In Implicit Neural Representations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Shi%2C+K">Kexuan Shi</a>, <a href="/search/?searchtype=author&query=Chen%2C+H">Hai Chen</a>, <a href="/search/?searchtype=author&query=Zhang%2C+L">Leheng Zhang</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shuhang Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13271v1-abstract-short" style="display: inline;"> Implicit Neural Representations (INRs), as a versatile representation paradigm, have achieved success in various computer vision tasks. Due to the spectral bias of the vanilla multi-layer perceptrons (MLPs), existing methods focus on designing MLPs with sophisticated architectures or repurposing training techniques for highly accurate INRs. In this paper, we delve into the linear dynamics model of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13271v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13271v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13271v1-abstract-full" style="display: none;"> Implicit Neural Representations (INRs), as a versatile representation paradigm, have achieved success in various computer vision tasks. Due to the spectral bias of the vanilla multi-layer perceptrons (MLPs), existing methods focus on designing MLPs with sophisticated architectures or repurposing training techniques for highly accurate INRs. In this paper, we delve into the linear dynamics model of MLPs and theoretically identify the empirical Neural Tangent Kernel (eNTK) matrix as a reliable link between spectral bias and training dynamics. Based on eNTK matrix, we propose a practical inductive gradient adjustment method, which could purposefully improve the spectral bias via inductive generalization of eNTK-based gradient transformation matrix. We evaluate our method on different INRs tasks with various INR architectures and compare to existing training techniques. The superior representation performance clearly validates the advantage of our proposed method. Armed with our gradient adjustment method, better INRs with more enhanced texture details and sharpened edges can be learned from data by tailored improvements on spectral bias. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13271v1-abstract-full').style.display = 'none'; document.getElementById('2410.13271v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages, 12 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10429">arXiv:2410.10429</a> <span> [<a href="https://arxiv.org/pdf/2410.10429">pdf</a>, <a href="https://arxiv.org/format/2410.10429">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DOME: Taming Diffusion Model into High-Fidelity Controllable Occupancy World Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gu%2C+S">Songen Gu</a>, <a href="/search/?searchtype=author&query=Yin%2C+W">Wei Yin</a>, <a href="/search/?searchtype=author&query=Jin%2C+B">Bu Jin</a>, <a href="/search/?searchtype=author&query=Guo%2C+X">Xiaoyang Guo</a>, <a href="/search/?searchtype=author&query=Wang%2C+J">Junming Wang</a>, <a href="/search/?searchtype=author&query=Li%2C+H">Haodong Li</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Q">Qian Zhang</a>, <a href="/search/?searchtype=author&query=Long%2C+X">Xiaoxiao Long</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10429v1-abstract-short" style="display: inline;"> We propose DOME, a diffusion-based world model that predicts future occupancy frames based on past occupancy observations. The ability of this world model to capture the evolution of the environment is crucial for planning in autonomous driving. Compared to 2D video-based world models, the occupancy world model utilizes a native 3D representation, which features easily obtainable annotations and i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10429v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10429v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10429v1-abstract-full" style="display: none;"> We propose DOME, a diffusion-based world model that predicts future occupancy frames based on past occupancy observations. The ability of this world model to capture the evolution of the environment is crucial for planning in autonomous driving. Compared to 2D video-based world models, the occupancy world model utilizes a native 3D representation, which features easily obtainable annotations and is modality-agnostic. This flexibility has the potential to facilitate the development of more advanced world models. Existing occupancy world models either suffer from detail loss due to discrete tokenization or rely on simplistic diffusion architectures, leading to inefficiencies and difficulties in predicting future occupancy with controllability. Our DOME exhibits two key features:(1) High-Fidelity and Long-Duration Generation. We adopt a spatial-temporal diffusion transformer to predict future occupancy frames based on historical context. This architecture efficiently captures spatial-temporal information, enabling high-fidelity details and the ability to generate predictions over long durations. (2)Fine-grained Controllability. We address the challenge of controllability in predictions by introducing a trajectory resampling method, which significantly enhances the model's ability to generate controlled predictions. Extensive experiments on the widely used nuScenes dataset demonstrate that our method surpasses existing baselines in both qualitative and quantitative evaluations, establishing a new state-of-the-art performance on nuScenes. Specifically, our approach surpasses the baseline by 10.5% in mIoU and 21.2% in IoU for occupancy reconstruction and by 36.0% in mIoU and 24.6% in IoU for 4D occupancy forecasting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10429v1-abstract-full').style.display = 'none'; document.getElementById('2410.10429v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Please visit our project page at https://gusongen.github.io/DOME</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.05051">arXiv:2410.05051</a> <span> [<a href="https://arxiv.org/pdf/2410.05051">pdf</a>, <a href="https://arxiv.org/format/2410.05051">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> HE-Drive: Human-Like End-to-End Driving with Vision Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+J">Junming Wang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+X">Xingyu Zhang</a>, <a href="/search/?searchtype=author&query=Xing%2C+Z">Zebin Xing</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Songen Gu</a>, <a href="/search/?searchtype=author&query=Guo%2C+X">Xiaoyang Guo</a>, <a href="/search/?searchtype=author&query=Hu%2C+Y">Yang Hu</a>, <a href="/search/?searchtype=author&query=Song%2C+Z">Ziying Song</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Q">Qian Zhang</a>, <a href="/search/?searchtype=author&query=Long%2C+X">Xiaoxiao Long</a>, <a href="/search/?searchtype=author&query=Yin%2C+W">Wei Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.05051v1-abstract-short" style="display: inline;"> In this paper, we propose HE-Drive: the first human-like-centric end-to-end autonomous driving system to generate trajectories that are both temporally consistent and comfortable. Recent studies have shown that imitation learning-based planners and learning-based trajectory scorers can effectively generate and select accuracy trajectories that closely mimic expert demonstrations. However, such tra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05051v1-abstract-full').style.display = 'inline'; document.getElementById('2410.05051v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.05051v1-abstract-full" style="display: none;"> In this paper, we propose HE-Drive: the first human-like-centric end-to-end autonomous driving system to generate trajectories that are both temporally consistent and comfortable. Recent studies have shown that imitation learning-based planners and learning-based trajectory scorers can effectively generate and select accuracy trajectories that closely mimic expert demonstrations. However, such trajectory planners and scorers face the dilemma of generating temporally inconsistent and uncomfortable trajectories. To solve the above problems, Our HE-Drive first extracts key 3D spatial representations through sparse perception, which then serves as conditional inputs for a Conditional Denoising Diffusion Probabilistic Models (DDPMs)-based motion planner to generate temporal consistency multi-modal trajectories. A Vision-Language Models (VLMs)-guided trajectory scorer subsequently selects the most comfortable trajectory from these candidates to control the vehicle, ensuring human-like end-to-end driving. Experiments show that HE-Drive not only achieves state-of-the-art performance (i.e., reduces the average collision rate by 71% than VAD) and efficiency (i.e., 1.9X faster than SparseDrive) on the challenging nuScenes and OpenScene datasets but also provides the most comfortable driving experience on real-world data.For more information, visit the project website: https://jmwang0117.github.io/HE-Drive/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.05051v1-abstract-full').style.display = 'none'; document.getElementById('2410.05051v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04847">arXiv:2410.04847</a> <span> [<a href="https://arxiv.org/pdf/2410.04847">pdf</a>, <a href="https://arxiv.org/format/2410.04847">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Causal Context Adjustment Loss for Learned Image Compression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Han%2C+M">Minghao Han</a>, <a href="/search/?searchtype=author&query=Jiang%2C+S">Shiyin Jiang</a>, <a href="/search/?searchtype=author&query=Li%2C+S">Shengxi Li</a>, <a href="/search/?searchtype=author&query=Deng%2C+X">Xin Deng</a>, <a href="/search/?searchtype=author&query=Xu%2C+M">Mai Xu</a>, <a href="/search/?searchtype=author&query=Zhu%2C+C">Ce Zhu</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shuhang Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04847v1-abstract-short" style="display: inline;"> In recent years, learned image compression (LIC) technologies have surpassed conventional methods notably in terms of rate-distortion (RD) performance. Most present learned techniques are VAE-based with an autoregressive entropy model, which obviously promotes the RD performance by utilizing the decoded causal context. However, extant methods are highly dependent on the fixed hand-crafted causal c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04847v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04847v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04847v1-abstract-full" style="display: none;"> In recent years, learned image compression (LIC) technologies have surpassed conventional methods notably in terms of rate-distortion (RD) performance. Most present learned techniques are VAE-based with an autoregressive entropy model, which obviously promotes the RD performance by utilizing the decoded causal context. However, extant methods are highly dependent on the fixed hand-crafted causal context. The question of how to guide the auto-encoder to generate a more effective causal context benefit for the autoregressive entropy models is worth exploring. In this paper, we make the first attempt in investigating the way to explicitly adjust the causal context with our proposed Causal Context Adjustment loss (CCA-loss). By imposing the CCA-loss, we enable the neural network to spontaneously adjust important information into the early stage of the autoregressive entropy model. Furthermore, as transformer technology develops remarkably, variants of which have been adopted by many state-of-the-art (SOTA) LIC techniques. The existing computing devices have not adapted the calculation of the attention mechanism well, which leads to a burden on computation quantity and inference latency. To overcome it, we establish a convolutional neural network (CNN) image compression model and adopt the unevenly channel-wise grouped strategy for high efficiency. Ultimately, the proposed CNN-based LIC network trained with our Causal Context Adjustment loss attains a great trade-off between inference latency and rate-distortion performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04847v1-abstract-full').style.display = 'none'; document.getElementById('2410.04847v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04335">arXiv:2410.04335</a> <span> [<a href="https://arxiv.org/pdf/2410.04335">pdf</a>, <a href="https://arxiv.org/format/2410.04335">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> ReTok: Replacing Tokenizer to Enhance Representation Efficiency in Large Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gu%2C+S">Shuhao Gu</a>, <a href="/search/?searchtype=author&query=Zhao%2C+M">Mengdi Zhao</a>, <a href="/search/?searchtype=author&query=Zhang%2C+B">Bowen Zhang</a>, <a href="/search/?searchtype=author&query=Wang%2C+L">Liangdong Wang</a>, <a href="/search/?searchtype=author&query=Li%2C+J">Jijie Li</a>, <a href="/search/?searchtype=author&query=Liu%2C+G">Guang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04335v1-abstract-short" style="display: inline;"> Tokenizer is an essential component for large language models (LLMs), and a tokenizer with a high compression rate can improve the model's representation and processing efficiency. However, the tokenizer cannot ensure high compression rate in all scenarios, and an increase in the average input and output lengths will increases the training and inference costs of the model. Therefore, it is crucial… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04335v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04335v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04335v1-abstract-full" style="display: none;"> Tokenizer is an essential component for large language models (LLMs), and a tokenizer with a high compression rate can improve the model's representation and processing efficiency. However, the tokenizer cannot ensure high compression rate in all scenarios, and an increase in the average input and output lengths will increases the training and inference costs of the model. Therefore, it is crucial to find ways to improve the model's efficiency with minimal cost while maintaining the model's performance. In this work, we propose a method to improve model representation and processing efficiency by replacing the tokenizers of LLMs. We propose replacing and reinitializing the parameters of the model's input and output layers with the parameters of the original model, and training these parameters while keeping other parameters fixed. We conducted experiments on different LLMs, and the results show that our method can maintain the performance of the model after replacing the tokenizer, while significantly improving the decoding speed for long texts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04335v1-abstract-full').style.display = 'none'; document.getElementById('2410.04335v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04003">arXiv:2410.04003</a> <span> [<a href="https://arxiv.org/pdf/2410.04003">pdf</a>, <a href="https://arxiv.org/ps/2410.04003">ps</a>, <a href="https://arxiv.org/format/2410.04003">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Device-independent quantum secret sharing with advanced random key generation basis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/?searchtype=author&query=Ying%2C+J">Jia-Wei Ying</a>, <a href="/search/?searchtype=author&query=Wang%2C+Z">Zhong-Jian Wang</a>, <a href="/search/?searchtype=author&query=Zhong%2C+W">Wei Zhong</a>, <a href="/search/?searchtype=author&query=Du%2C+M">Ming-Ming Du</a>, <a href="/search/?searchtype=author&query=Shen%2C+S">Shu-Ting Shen</a>, <a href="/search/?searchtype=author&query=Li%2C+X">Xi-Yun Li</a>, <a href="/search/?searchtype=author&query=Zhang%2C+A">An-Lei Zhang</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shi-Pu Gu</a>, <a href="/search/?searchtype=author&query=Wang%2C+X">Xing-Fu Wang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+L">Lan Zhou</a>, <a href="/search/?searchtype=author&query=Sheng%2C+Y">Yu-Bo Sheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04003v1-abstract-short" style="display: inline;"> Quantum secret sharing (QSS) enables a dealer to securely distribute keys to multiple players. Device-independent (DI) QSS can resist all possible attacks from practical imperfect devices and provide QSS the highest level of security in theory. However, DI QSS requires high-performance devices, especially for low-noise channels, which is a big challenge for its experimental demonstration. We propo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04003v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04003v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04003v1-abstract-full" style="display: none;"> Quantum secret sharing (QSS) enables a dealer to securely distribute keys to multiple players. Device-independent (DI) QSS can resist all possible attacks from practical imperfect devices and provide QSS the highest level of security in theory. However, DI QSS requires high-performance devices, especially for low-noise channels, which is a big challenge for its experimental demonstration. We propose a DI QSS protocol with the advanced random key generation basis strategy, which combines the random key generation basis with the noise preprocessing and postselection strategies. We develop the methods to simplify Eve's conditional entropy bound and numerically simulate the key generation rate in an acceptable time. Our DI QSS protocol has some advantages. First, it can increase the noise tolerance threshold from initial 7.147% to 9.231% (29.16% growth), and reduce the global detection efficiency threshold from 96.32% to 93.41%. The maximal distance between any two users increases to 1.43 km, which is about 5.5 times of the initial value. Second, by randomly selecting two basis combinations to generate the key, our DI QSS protocol can reduce the entanglement resource consumption. Our protocol has potential for DI QSS's experimental demonstration and application in the future. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04003v1-abstract-full').style.display = 'none'; document.getElementById('2410.04003v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 6 figures, 1 table</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.01143">arXiv:2410.01143</a> <span> [<a href="https://arxiv.org/pdf/2410.01143">pdf</a>, <a href="https://arxiv.org/format/2410.01143">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> StraightTrack: Towards Mixed Reality Navigation System for Percutaneous K-wire Insertion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+H">Han Zhang</a>, <a href="/search/?searchtype=author&query=Killeen%2C+B+D">Benjamin D. Killeen</a>, <a href="/search/?searchtype=author&query=Ku%2C+Y">Yu-Chun Ku</a>, <a href="/search/?searchtype=author&query=Seenivasan%2C+L">Lalithkumar Seenivasan</a>, <a href="/search/?searchtype=author&query=Zhao%2C+Y">Yuxuan Zhao</a>, <a href="/search/?searchtype=author&query=Liu%2C+M">Mingxu Liu</a>, <a href="/search/?searchtype=author&query=Yang%2C+Y">Yue Yang</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Suxi Gu</a>, <a href="/search/?searchtype=author&query=Martin-Gomez%2C+A">Alejandro Martin-Gomez</a>, <a href="/search/?searchtype=author&query=Taylor%2C+R+H">Russell H. Taylor</a>, <a href="/search/?searchtype=author&query=Osgood%2C+G">Greg Osgood</a>, <a href="/search/?searchtype=author&query=Unberath%2C+M">Mathias Unberath</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.01143v1-abstract-short" style="display: inline;"> In percutaneous pelvic trauma surgery, accurate placement of Kirschner wires (K-wires) is crucial to ensure effective fracture fixation and avoid complications due to breaching the cortical bone along an unsuitable trajectory. Surgical navigation via mixed reality (MR) can help achieve precise wire placement in a low-profile form factor. Current approaches in this domain are as yet unsuitable for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01143v1-abstract-full').style.display = 'inline'; document.getElementById('2410.01143v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.01143v1-abstract-full" style="display: none;"> In percutaneous pelvic trauma surgery, accurate placement of Kirschner wires (K-wires) is crucial to ensure effective fracture fixation and avoid complications due to breaching the cortical bone along an unsuitable trajectory. Surgical navigation via mixed reality (MR) can help achieve precise wire placement in a low-profile form factor. Current approaches in this domain are as yet unsuitable for real-world deployment because they fall short of guaranteeing accurate visual feedback due to uncontrolled bending of the wire. To ensure accurate feedback, we introduce StraightTrack, an MR navigation system designed for percutaneous wire placement in complex anatomy. StraightTrack features a marker body equipped with a rigid access cannula that mitigates wire bending due to interactions with soft tissue and a covered bony surface. Integrated with an Optical See-Through Head-Mounted Display (OST HMD) capable of tracking the cannula body, StraightTrack offers real-time 3D visualization and guidance without external trackers, which are prone to losing line-of-sight. In phantom experiments with two experienced orthopedic surgeons, StraightTrack improves wire placement accuracy, achieving the ideal trajectory within $5.26 \pm 2.29$ mm and $2.88 \pm 1.49$ degree, compared to over 12.08 mm and 4.07 degree for comparable methods. As MR navigation systems continue to mature, StraightTrack realizes their potential for internal fracture fixation and other percutaneous orthopedic procedures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.01143v1-abstract-full').style.display = 'none'; document.getElementById('2410.01143v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00386">arXiv:2410.00386</a> <span> [<a href="https://arxiv.org/pdf/2410.00386">pdf</a>, <a href="https://arxiv.org/format/2410.00386">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Seamless Augmented Reality Integration in Arthroscopy: A Pipeline for Articular Reconstruction and Guidance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Shu%2C+H">Hongchao Shu</a>, <a href="/search/?searchtype=author&query=Liu%2C+M">Mingxu Liu</a>, <a href="/search/?searchtype=author&query=Seenivasan%2C+L">Lalithkumar Seenivasan</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Suxi Gu</a>, <a href="/search/?searchtype=author&query=Ku%2C+P">Ping-Cheng Ku</a>, <a href="/search/?searchtype=author&query=Knopf%2C+J">Jonathan Knopf</a>, <a href="/search/?searchtype=author&query=Taylor%2C+R">Russell Taylor</a>, <a href="/search/?searchtype=author&query=Unberath%2C+M">Mathias Unberath</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00386v1-abstract-short" style="display: inline;"> Arthroscopy is a minimally invasive surgical procedure used to diagnose and treat joint problems. The clinical workflow of arthroscopy typically involves inserting an arthroscope into the joint through a small incision, during which surgeons navigate and operate largely by relying on their visual assessment through the arthroscope. However, the arthroscope's restricted field of view and lack of de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00386v1-abstract-full').style.display = 'inline'; document.getElementById('2410.00386v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00386v1-abstract-full" style="display: none;"> Arthroscopy is a minimally invasive surgical procedure used to diagnose and treat joint problems. The clinical workflow of arthroscopy typically involves inserting an arthroscope into the joint through a small incision, during which surgeons navigate and operate largely by relying on their visual assessment through the arthroscope. However, the arthroscope's restricted field of view and lack of depth perception pose challenges in navigating complex articular structures and achieving surgical precision during procedures. Aiming at enhancing intraoperative awareness, we present a robust pipeline that incorporates simultaneous localization and mapping, depth estimation, and 3D Gaussian splatting to realistically reconstruct intra-articular structures solely based on monocular arthroscope video. Extending 3D reconstruction to Augmented Reality (AR) applications, our solution offers AR assistance for articular notch measurement and annotation anchoring in a human-in-the-loop manner. Compared to traditional Structure-from-Motion and Neural Radiance Field-based methods, our pipeline achieves dense 3D reconstruction and competitive rendering fidelity with explicit 3D representation in 7 minutes on average. When evaluated on four phantom datasets, our method achieves RMSE = 2.21mm reconstruction error, PSNR = 32.86 and SSIM = 0.89 on average. Because our pipeline enables AR reconstruction and guidance directly from monocular arthroscopy without any additional data and/or hardware, our solution may hold the potential for enhancing intraoperative awareness and facilitating surgical precision in arthroscopy. Our AR measurement tool achieves accuracy within 1.59 +/- 1.81mm and the AR annotation tool achieves a mIoU of 0.721. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00386v1-abstract-full').style.display = 'none'; document.getElementById('2410.00386v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, with 2 additional pages as the supplementary. Accepted by AE-CAI 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> F.2.2; I.2.7 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17772">arXiv:2409.17772</a> <span> [<a href="https://arxiv.org/pdf/2409.17772">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Fluid Dynamics">physics.flu-dyn</span> </div> </div> <p class="title is-5 mathjax"> An Experimental Configuration to Study High-Enthalpy Radiating Flows Under Nonequilibrium De-excitation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Liu%2C+Z">Zhuo Liu</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Sangdi Gu</a>, <a href="/search/?searchtype=author&query=Chen%2C+T">Tiantian Chen</a>, <a href="/search/?searchtype=author&query=Hao%2C+J">Jiaao Hao</a>, <a href="/search/?searchtype=author&query=Wen%2C+C">Chih-yung Wen</a>, <a href="/search/?searchtype=author&query=Wang%2C+Q">Qiu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17772v1-abstract-short" style="display: inline;"> This paper presents an experimental configuration to study high-enthalpy radiating flows under nonequilibrium de-excitation. A general design method is introduced, combiningtheoretical analysis and numerical simulations to tailor the flow conditions for various research objectives. The implementation involves considerations of the shock tube condition, the arrangement configuration, and the effect… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17772v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17772v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17772v1-abstract-full" style="display: none;"> This paper presents an experimental configuration to study high-enthalpy radiating flows under nonequilibrium de-excitation. A general design method is introduced, combiningtheoretical analysis and numerical simulations to tailor the flow conditions for various research objectives. The implementation involves considerations of the shock tube condition, the arrangement configuration, and the effective measurement zone. The interplay between shock tube condition and aerofoil geometry generates diverse de-excitation patterns. The shock tube test time, transition onset location, and radiance intensity determine the effective measurement zone. An example utilizing N2 as the test gas demonstrates the method, achieving one-dimensional flow with thermal nonequilibrium and chemical freezing along the centerline, validating the method's effectiveness. An effective measurement zone of 200 mm is obtained under this condition, and the primary constraint under high-enthalpy conditions is the limited shock tube test time due to the high shock velocity and low fill pressure. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17772v1-abstract-full').style.display = 'none'; document.getElementById('2409.17772v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.17561">arXiv:2409.17561</a> <span> [<a href="https://arxiv.org/pdf/2409.17561">pdf</a>, <a href="https://arxiv.org/format/2409.17561">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> TestBench: Evaluating Class-Level Test Case Generation Capability of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+Q">Quanjun Zhang</a>, <a href="/search/?searchtype=author&query=Shang%2C+Y">Ye Shang</a>, <a href="/search/?searchtype=author&query=Fang%2C+C">Chunrong Fang</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Siqi Gu</a>, <a href="/search/?searchtype=author&query=Zhou%2C+J">Jianyi Zhou</a>, <a href="/search/?searchtype=author&query=Chen%2C+Z">Zhenyu Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.17561v1-abstract-short" style="display: inline;"> Software testing is a crucial phase in the software life cycle, helping identify potential risks and reduce maintenance costs. With the advancement of Large Language Models (LLMs), researchers have proposed an increasing number of LLM-based software testing techniques, particularly in the area of test case generation. Despite the growing interest, limited efforts have been made to thoroughly evalu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17561v1-abstract-full').style.display = 'inline'; document.getElementById('2409.17561v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.17561v1-abstract-full" style="display: none;"> Software testing is a crucial phase in the software life cycle, helping identify potential risks and reduce maintenance costs. With the advancement of Large Language Models (LLMs), researchers have proposed an increasing number of LLM-based software testing techniques, particularly in the area of test case generation. Despite the growing interest, limited efforts have been made to thoroughly evaluate the actual capabilities of LLMs in this task. In this paper, we introduce TestBench, a benchmark for class-level LLM-based test case generation. We construct a dataset of 108 Java programs from 9 real-world, large-scale projects on GitHub, each representing a different thematic domain. We then design three distinct types of prompts based on context descriptions, including self-contained context, full context, and simple context. Besides, we propose a fine-grained evaluation framework that considers five aspects of test cases: syntactic correctness, compilation correctness, test correctness, code coverage rate, and defect detection rate. Furthermore, we propose a heuristic algorithm to repair erroneous test cases generated by LLMs. We evaluate CodeLlama-13b, GPT-3.5, and GPT-4 on the TestBench, and our experimental results indicate that larger models demonstrate a greater ability to effectively utilize contextual information, thus generating higher-quality test cases. Smaller models may struggle with the noise introduced by the extensive information contained within the full context. However, when using the simplified version, namely the simple context, which is derived from the full context via abstract syntax tree analysis, the performance of these models improves significantly. Our analysis highlights the current progress and pinpoints future directions to further enhance the effectiveness of models by handling contextual information for test case generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.17561v1-abstract-full').style.display = 'none'; document.getElementById('2409.17561v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.16947">arXiv:2409.16947</a> <span> [<a href="https://arxiv.org/pdf/2409.16947">pdf</a>, <a href="https://arxiv.org/format/2409.16947">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> NTIRE 2024 Challenge on Stereo Image Super-Resolution: Methods and Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Wang%2C+L">Longguang Wang</a>, <a href="/search/?searchtype=author&query=Guo%2C+Y">Yulan Guo</a>, <a href="/search/?searchtype=author&query=Li%2C+J">Juncheng Li</a>, <a href="/search/?searchtype=author&query=Liu%2C+H">Hongda Liu</a>, <a href="/search/?searchtype=author&query=Zhao%2C+Y">Yang Zhao</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y">Yingqian Wang</a>, <a href="/search/?searchtype=author&query=Jin%2C+Z">Zhi Jin</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shuhang Gu</a>, <a href="/search/?searchtype=author&query=Timofte%2C+R">Radu Timofte</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.16947v1-abstract-short" style="display: inline;"> This paper summarizes the 3rd NTIRE challenge on stereo image super-resolution (SR) with a focus on new solutions and results. The task of this challenge is to super-resolve a low-resolution stereo image pair to a high-resolution one with a magnification factor of x4 under a limited computational budget. Compared with single image SR, the major challenge of this challenge lies in how to exploit ad… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16947v1-abstract-full').style.display = 'inline'; document.getElementById('2409.16947v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.16947v1-abstract-full" style="display: none;"> This paper summarizes the 3rd NTIRE challenge on stereo image super-resolution (SR) with a focus on new solutions and results. The task of this challenge is to super-resolve a low-resolution stereo image pair to a high-resolution one with a magnification factor of x4 under a limited computational budget. Compared with single image SR, the major challenge of this challenge lies in how to exploit additional information in another viewpoint and how to maintain stereo consistency in the results. This challenge has 2 tracks, including one track on bicubic degradation and one track on real degradations. In total, 108 and 70 participants were successfully registered for each track, respectively. In the test phase, 14 and 13 teams successfully submitted valid results with PSNR (RGB) scores better than the baseline. This challenge establishes a new benchmark for stereo image SR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.16947v1-abstract-full').style.display = 'none'; document.getElementById('2409.16947v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.06691">arXiv:2409.06691</a> <span> [<a href="https://arxiv.org/pdf/2409.06691">pdf</a>, <a href="https://arxiv.org/format/2409.06691">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Geometric-Averaged Preference Optimization for Soft Preference Labels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Furuta%2C+H">Hiroki Furuta</a>, <a href="/search/?searchtype=author&query=Lee%2C+K">Kuang-Huei Lee</a>, <a href="/search/?searchtype=author&query=Gu%2C+S+S">Shixiang Shane Gu</a>, <a href="/search/?searchtype=author&query=Matsuo%2C+Y">Yutaka Matsuo</a>, <a href="/search/?searchtype=author&query=Faust%2C+A">Aleksandra Faust</a>, <a href="/search/?searchtype=author&query=Zen%2C+H">Heiga Zen</a>, <a href="/search/?searchtype=author&query=Gur%2C+I">Izzeddin Gur</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.06691v2-abstract-short" style="display: inline;"> Many algorithms for aligning LLMs with human preferences assume that human preferences are binary and deterministic. However, human preferences can vary across individuals, and therefore should be represented distributionally. In this work, we introduce the distributional soft preference labels and improve Direct Preference Optimization (DPO) with a weighted geometric average of the LLM output lik… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06691v2-abstract-full').style.display = 'inline'; document.getElementById('2409.06691v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.06691v2-abstract-full" style="display: none;"> Many algorithms for aligning LLMs with human preferences assume that human preferences are binary and deterministic. However, human preferences can vary across individuals, and therefore should be represented distributionally. In this work, we introduce the distributional soft preference labels and improve Direct Preference Optimization (DPO) with a weighted geometric average of the LLM output likelihood in the loss function. This approach adjusts the scale of learning loss based on the soft labels such that the loss would approach zero when the responses are closer to equally preferred. This simple modification can be easily applied to any DPO-based methods and mitigate over-optimization and objective mismatch, which prior works suffer from. Our experiments simulate the soft preference labels with AI feedback from LLMs and demonstrate that geometric averaging consistently improves performance on standard benchmarks for alignment research. In particular, we observe more preferable responses than binary labels and significant improvements where modestly-confident labels are in the majority. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06691v2-abstract-full').style.display = 'none'; document.getElementById('2409.06691v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.13517">arXiv:2408.13517</a> <span> [<a href="https://arxiv.org/pdf/2408.13517">pdf</a>, <a href="https://arxiv.org/format/2408.13517">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Scalable Similarity-Aware Test Suite Minimization with Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gu%2C+S">Sijia Gu</a>, <a href="/search/?searchtype=author&query=Mesbah%2C+A">Ali Mesbah</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.13517v1-abstract-short" style="display: inline;"> The Multi-Criteria Test Suite Minimization (MCTSM) problem aims to refine test suites by removing redundant test cases, guided by adequacy criteria such as code coverage or fault detection capability. However, current techniques either exhibit a high loss of fault detection ability or face scalability challenges due to the NP-hard nature of the problem, which limits their practical utility. We pro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13517v1-abstract-full').style.display = 'inline'; document.getElementById('2408.13517v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.13517v1-abstract-full" style="display: none;"> The Multi-Criteria Test Suite Minimization (MCTSM) problem aims to refine test suites by removing redundant test cases, guided by adequacy criteria such as code coverage or fault detection capability. However, current techniques either exhibit a high loss of fault detection ability or face scalability challenges due to the NP-hard nature of the problem, which limits their practical utility. We propose TripRL, a novel technique that integrates traditional criteria such as statement coverage and fault detection ability with test coverage similarity into an Integer Linear Program (ILP), to produce a diverse reduced test suite with high test effectiveness. TripRL leverages bipartite graph representation and its embedding for concise ILP formulation and combines ILP with effective reinforcement learning (RL) training. This combination renders large-scale test suite minimization more scalable and enhances test effectiveness. Our empirical evaluations demonstrate that TripRL's runtime scales linearly with the magnitude of the MCTSM problem. Notably, for large test suites where existing approaches fail to provide solutions within a reasonable time frame, our technique consistently delivers solutions in less than 47 minutes. The reduced test suites produced by TripRL also maintain the original statement coverage and fault detection ability while having a higher potential to detect unknown faults. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.13517v1-abstract-full').style.display = 'none'; document.getElementById('2408.13517v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.12621">arXiv:2408.12621</a> <span> [<a href="https://arxiv.org/pdf/2408.12621">pdf</a>, <a href="https://arxiv.org/format/2408.12621">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> StringNET: Neural Network based Variational Method for Transition Pathways </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Han%2C+J">Jiayue Han</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shuting Gu</a>, <a href="/search/?searchtype=author&query=Zhou%2C+X">Xiang Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.12621v1-abstract-short" style="display: inline;"> Rare transition events in meta-stable systems under noisy fluctuations are crucial for many non-equilibrium physical and chemical processes. In these processes, the primary contributions to reactive flux are predominantly near the transition pathways that connect two meta-stable states. Efficient computation of these paths is essential in computational chemistry. In this work, we examine the tempe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12621v1-abstract-full').style.display = 'inline'; document.getElementById('2408.12621v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.12621v1-abstract-full" style="display: none;"> Rare transition events in meta-stable systems under noisy fluctuations are crucial for many non-equilibrium physical and chemical processes. In these processes, the primary contributions to reactive flux are predominantly near the transition pathways that connect two meta-stable states. Efficient computation of these paths is essential in computational chemistry. In this work, we examine the temperature-dependent maximum flux path, the minimum energy path, and the minimum action path at zero temperature. We propose the StringNET method for training these paths using variational formulations and deep learning techniques. Unlike traditional chain-of-state methods, StringNET directly parametrizes the paths through neural network functions, utilizing the arc-length parameter as the main input. The tasks of gradient descent and re-parametrization in the string method are unified into a single framework using loss functions to train deep neural networks. More importantly, the loss function for the maximum flux path is interpreted as a softmax approximation to the numerically challenging minimax problem of the minimum energy path. To compute the minimum energy path efficiently and robustly, we developed a pre-training strategy that includes the maximum flux path loss in the early training stage, significantly accelerating the computation of minimum energy and action paths. We demonstrate the superior performance of this method through various analytical and chemical examples, as well as the two- and four-dimensional Ginzburg-Landau functional energy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12621v1-abstract-full').style.display = 'none'; document.getElementById('2408.12621v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.12534">arXiv:2408.12534</a> <span> [<a href="https://arxiv.org/pdf/2408.12534">pdf</a>, <a href="https://arxiv.org/format/2408.12534">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Automatic Organ and Pan-cancer Segmentation in Abdomen CT: the FLARE 2023 Challenge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Ma%2C+J">Jun Ma</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yao Zhang</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Song Gu</a>, <a href="/search/?searchtype=author&query=Ge%2C+C">Cheng Ge</a>, <a href="/search/?searchtype=author&query=Wang%2C+E">Ershuai Wang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+Q">Qin Zhou</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Ziyan Huang</a>, <a href="/search/?searchtype=author&query=Lyu%2C+P">Pengju Lyu</a>, <a href="/search/?searchtype=author&query=He%2C+J">Jian He</a>, <a href="/search/?searchtype=author&query=Wang%2C+B">Bo Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.12534v1-abstract-short" style="display: inline;"> Organ and cancer segmentation in abdomen Computed Tomography (CT) scans is the prerequisite for precise cancer diagnosis and treatment. Most existing benchmarks and algorithms are tailored to specific cancer types, limiting their ability to provide comprehensive cancer analysis. This work presents the first international competition on abdominal organ and pan-cancer segmentation by providing a lar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12534v1-abstract-full').style.display = 'inline'; document.getElementById('2408.12534v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.12534v1-abstract-full" style="display: none;"> Organ and cancer segmentation in abdomen Computed Tomography (CT) scans is the prerequisite for precise cancer diagnosis and treatment. Most existing benchmarks and algorithms are tailored to specific cancer types, limiting their ability to provide comprehensive cancer analysis. This work presents the first international competition on abdominal organ and pan-cancer segmentation by providing a large-scale and diverse dataset, including 4650 CT scans with various cancer types from over 40 medical centers. The winning team established a new state-of-the-art with a deep learning-based cascaded framework, achieving average Dice Similarity Coefficient scores of 92.3% for organs and 64.9% for lesions on the hidden multi-national testing set. The dataset and code of top teams are publicly available, offering a benchmark platform to drive further innovations https://codalab.lisn.upsaclay.fr/competitions/12239. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12534v1-abstract-full').style.display = 'none'; document.getElementById('2408.12534v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">MICCAI 2024 FLARE Challenge Summary</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.09675">arXiv:2408.09675</a> <span> [<a href="https://arxiv.org/pdf/2408.09675">pdf</a>, <a href="https://arxiv.org/format/2408.09675">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Multi-Agent Reinforcement Learning for Autonomous Driving: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+R">Ruiqi Zhang</a>, <a href="/search/?searchtype=author&query=Hou%2C+J">Jing Hou</a>, <a href="/search/?searchtype=author&query=Walter%2C+F">Florian Walter</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shangding Gu</a>, <a href="/search/?searchtype=author&query=Guan%2C+J">Jiayi Guan</a>, <a href="/search/?searchtype=author&query=R%C3%B6hrbein%2C+F">Florian R枚hrbein</a>, <a href="/search/?searchtype=author&query=Du%2C+Y">Yali Du</a>, <a href="/search/?searchtype=author&query=Cai%2C+P">Panpan Cai</a>, <a href="/search/?searchtype=author&query=Chen%2C+G">Guang Chen</a>, <a href="/search/?searchtype=author&query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.09675v1-abstract-short" style="display: inline;"> Reinforcement Learning (RL) is a potent tool for sequential decision-making and has achieved performance surpassing human capabilities across many challenging real-world tasks. As the extension of RL in the multi-agent system domain, multi-agent RL (MARL) not only need to learn the control policy but also requires consideration regarding interactions with all other agents in the environment, mutua… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09675v1-abstract-full').style.display = 'inline'; document.getElementById('2408.09675v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.09675v1-abstract-full" style="display: none;"> Reinforcement Learning (RL) is a potent tool for sequential decision-making and has achieved performance surpassing human capabilities across many challenging real-world tasks. As the extension of RL in the multi-agent system domain, multi-agent RL (MARL) not only need to learn the control policy but also requires consideration regarding interactions with all other agents in the environment, mutual influences among different system components, and the distribution of computational resources. This augments the complexity of algorithmic design and poses higher requirements on computational resources. Simultaneously, simulators are crucial to obtain realistic data, which is the fundamentals of RL. In this paper, we first propose a series of metrics of simulators and summarize the features of existing benchmarks. Second, to ease comprehension, we recall the foundational knowledge and then synthesize the recently advanced studies of MARL-related autonomous driving and intelligent transportation systems. Specifically, we examine their environmental modeling, state representation, perception units, and algorithm design. Conclusively, we discuss open challenges as well as prospects and opportunities. We hope this paper can help the researchers integrate MARL technologies and trigger more insightful ideas toward the intelligent and autonomous driving. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.09675v1-abstract-full').style.display = 'none'; document.getElementById('2408.09675v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 6 figures and 2 tables. Submitted to IEEE Journal</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.07410">arXiv:2408.07410</a> <span> [<a href="https://arxiv.org/pdf/2408.07410">pdf</a>, <a href="https://arxiv.org/format/2408.07410">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Aquila2 Technical Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+B">Bo-Wen Zhang</a>, <a href="/search/?searchtype=author&query=Wang%2C+L">Liangdong Wang</a>, <a href="/search/?searchtype=author&query=Li%2C+J">Jijie Li</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shuhao Gu</a>, <a href="/search/?searchtype=author&query=Wu%2C+X">Xinya Wu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Z">Zhengduo Zhang</a>, <a href="/search/?searchtype=author&query=Gao%2C+B">Boyan Gao</a>, <a href="/search/?searchtype=author&query=Ao%2C+Y">Yulong Ao</a>, <a href="/search/?searchtype=author&query=Liu%2C+G">Guang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.07410v1-abstract-short" style="display: inline;"> This paper introduces the Aquila2 series, which comprises a wide range of bilingual models with parameter sizes of 7, 34, and 70 billion. These models are trained based on an innovative framework named HeuriMentor (HM), which offers real-time insights into model convergence and enhances the training process and data management. The HM System, comprising the Adaptive Training Engine (ATE), Training… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07410v1-abstract-full').style.display = 'inline'; document.getElementById('2408.07410v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.07410v1-abstract-full" style="display: none;"> This paper introduces the Aquila2 series, which comprises a wide range of bilingual models with parameter sizes of 7, 34, and 70 billion. These models are trained based on an innovative framework named HeuriMentor (HM), which offers real-time insights into model convergence and enhances the training process and data management. The HM System, comprising the Adaptive Training Engine (ATE), Training State Monitor (TSM), and Data Management Unit (DMU), allows for precise monitoring of the model's training progress and enables efficient optimization of data distribution, thereby enhancing training effectiveness. Extensive evaluations show that the Aquila2 model series performs comparably well on both English and Chinese benchmarks. Specifically, Aquila2-34B demonstrates only a slight decrease in performance when quantized to Int4. Furthermore, we have made our training code (https://github.com/FlagOpen/FlagScale) and model weights (https://github.com/FlagAI-Open/Aquila2) publicly available to support ongoing research and the development of applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.07410v1-abstract-full').style.display = 'none'; document.getElementById('2408.07410v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.06567">arXiv:2408.06567</a> <span> [<a href="https://arxiv.org/pdf/2408.06567">pdf</a>, <a href="https://arxiv.org/format/2408.06567">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AquilaMoE: Efficient Training for MoE Models with Scale-Up and Scale-Out Strategies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+B">Bo-Wen Zhang</a>, <a href="/search/?searchtype=author&query=Wang%2C+L">Liangdong Wang</a>, <a href="/search/?searchtype=author&query=Yuan%2C+Y">Ye Yuan</a>, <a href="/search/?searchtype=author&query=Li%2C+J">Jijie Li</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shuhao Gu</a>, <a href="/search/?searchtype=author&query=Zhao%2C+M">Mengdi Zhao</a>, <a href="/search/?searchtype=author&query=Wu%2C+X">Xinya Wu</a>, <a href="/search/?searchtype=author&query=Liu%2C+G">Guang Liu</a>, <a href="/search/?searchtype=author&query=Wu%2C+C">Chengwei Wu</a>, <a href="/search/?searchtype=author&query=Zhao%2C+H">Hanyu Zhao</a>, <a href="/search/?searchtype=author&query=Du%2C+L">Li Du</a>, <a href="/search/?searchtype=author&query=Ju%2C+Y">Yiming Ju</a>, <a href="/search/?searchtype=author&query=Ma%2C+Q">Quanyue Ma</a>, <a href="/search/?searchtype=author&query=Ao%2C+Y">Yulong Ao</a>, <a href="/search/?searchtype=author&query=Zhao%2C+Y">Yingli Zhao</a>, <a href="/search/?searchtype=author&query=Zhu%2C+S">Songhe Zhu</a>, <a href="/search/?searchtype=author&query=Cao%2C+Z">Zhou Cao</a>, <a href="/search/?searchtype=author&query=Liang%2C+D">Dong Liang</a>, <a href="/search/?searchtype=author&query=Lin%2C+Y">Yonghua Lin</a>, <a href="/search/?searchtype=author&query=Zhang%2C+M">Ming Zhang</a>, <a href="/search/?searchtype=author&query=Wang%2C+S">Shunfei Wang</a>, <a href="/search/?searchtype=author&query=Zhou%2C+Y">Yanxin Zhou</a>, <a href="/search/?searchtype=author&query=Ye%2C+M">Min Ye</a>, <a href="/search/?searchtype=author&query=Chen%2C+X">Xuekai Chen</a>, <a href="/search/?searchtype=author&query=Yu%2C+X">Xinyang Yu</a> , et al. (2 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.06567v1-abstract-short" style="display: inline;"> In recent years, with the rapid application of large language models across various fields, the scale of these models has gradually increased, and the resources required for their pre-training have grown exponentially. Training an LLM from scratch will cost a lot of computation resources while scaling up from a smaller model is a more efficient approach and has thus attracted significant attention… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06567v1-abstract-full').style.display = 'inline'; document.getElementById('2408.06567v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.06567v1-abstract-full" style="display: none;"> In recent years, with the rapid application of large language models across various fields, the scale of these models has gradually increased, and the resources required for their pre-training have grown exponentially. Training an LLM from scratch will cost a lot of computation resources while scaling up from a smaller model is a more efficient approach and has thus attracted significant attention. In this paper, we present AquilaMoE, a cutting-edge bilingual 8*16B Mixture of Experts (MoE) language model that has 8 experts with 16 billion parameters each and is developed using an innovative training methodology called EfficientScale. This approach optimizes performance while minimizing data requirements through a two-stage process. The first stage, termed Scale-Up, initializes the larger model with weights from a pre-trained smaller model, enabling substantial knowledge transfer and continuous pretraining with significantly less data. The second stage, Scale-Out, uses a pre-trained dense model to initialize the MoE experts, further enhancing knowledge transfer and performance. Extensive validation experiments on 1.8B and 7B models compared various initialization schemes, achieving models that maintain and reduce loss during continuous pretraining. Utilizing the optimal scheme, we successfully trained a 16B model and subsequently the 8*16B AquilaMoE model, demonstrating significant improvements in performance and training efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.06567v1-abstract-full').style.display = 'none'; document.getElementById('2408.06567v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.03272">arXiv:2408.03272</a> <span> [<a href="https://arxiv.org/pdf/2408.03272">pdf</a>, <a href="https://arxiv.org/format/2408.03272">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Plasma Physics">physics.plasm-ph</span> </div> </div> <p class="title is-5 mathjax"> Suppression of Edge Localized Modes in ITER Baseline Scenario in EAST using Edge Localized Magnetic Perturbations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Xie%2C+P">P. Xie</a>, <a href="/search/?searchtype=author&query=Sun%2C+Y">Y. Sun</a>, <a href="/search/?searchtype=author&query=Jia%2C+M">M. Jia</a>, <a href="/search/?searchtype=author&query=Loarte%2C+A">A. Loarte</a>, <a href="/search/?searchtype=author&query=Liu%2C+Y+Q">Y. Q. Liu</a>, <a href="/search/?searchtype=author&query=Ye%2C+C">C. Ye</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">S. Gu</a>, <a href="/search/?searchtype=author&query=Sheng%2C+H">H. Sheng</a>, <a href="/search/?searchtype=author&query=Liang%2C+Y">Y. Liang</a>, <a href="/search/?searchtype=author&query=Ma%2C+Q">Q. Ma</a>, <a href="/search/?searchtype=author&query=Yang%2C+H">H. Yang</a>, <a href="/search/?searchtype=author&query=Paz-Soldan%2C+C+A">C. A. Paz-Soldan</a>, <a href="/search/?searchtype=author&query=Deng%2C+G">G. Deng</a>, <a href="/search/?searchtype=author&query=Fu%2C+S">S. Fu</a>, <a href="/search/?searchtype=author&query=Chen%2C+G">G. Chen</a>, <a href="/search/?searchtype=author&query=He%2C+K">K. He</a>, <a href="/search/?searchtype=author&query=Jia%2C+T">T. Jia</a>, <a href="/search/?searchtype=author&query=Lu%2C+D">D. Lu</a>, <a href="/search/?searchtype=author&query=Lv%2C+B">B. Lv</a>, <a href="/search/?searchtype=author&query=Qian%2C+J">J. Qian</a>, <a href="/search/?searchtype=author&query=Wang%2C+H+H">H. H. Wang</a>, <a href="/search/?searchtype=author&query=Wang%2C+S">S. Wang</a>, <a href="/search/?searchtype=author&query=Weisberg%2C+D">D. Weisberg</a>, <a href="/search/?searchtype=author&query=Wu%2C+X">X. Wu</a>, <a href="/search/?searchtype=author&query=Xu%2C+W">W. Xu</a> , et al. (9 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.03272v1-abstract-short" style="display: inline;"> We report the suppression of Type-I Edge Localized Modes (ELMs) in the EAST tokamak under ITER baseline conditions using $n = 4$ Resonant Magnetic Perturbations (RMPs), while maintaining energy confinement. Achieving RMP-ELM suppression requires a normalized plasma beta ($尾_N$) exceeding 1.8 in a target plasma with $q_{95}\approx 3.1$ and tungsten divertors. Quasi-linear modeling shows high plasma… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03272v1-abstract-full').style.display = 'inline'; document.getElementById('2408.03272v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.03272v1-abstract-full" style="display: none;"> We report the suppression of Type-I Edge Localized Modes (ELMs) in the EAST tokamak under ITER baseline conditions using $n = 4$ Resonant Magnetic Perturbations (RMPs), while maintaining energy confinement. Achieving RMP-ELM suppression requires a normalized plasma beta ($尾_N$) exceeding 1.8 in a target plasma with $q_{95}\approx 3.1$ and tungsten divertors. Quasi-linear modeling shows high plasma beta enhances RMP-driven neoclassical toroidal viscosity torque, reducing field penetration thresholds. These findings demonstrate the feasibility and efficiency of high $n$ RMPs for ELM suppression in ITER. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03272v1-abstract-full').style.display = 'none'; document.getElementById('2408.03272v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.03095">arXiv:2408.03095</a> <span> [<a href="https://arxiv.org/pdf/2408.03095">pdf</a>, <a href="https://arxiv.org/format/2408.03095">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Improving LLM-based Unit test generation via Template-based Repair </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gu%2C+S">Siqi Gu</a>, <a href="/search/?searchtype=author&query=Fang%2C+C">Chunrong Fang</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Q">Quanjun Zhang</a>, <a href="/search/?searchtype=author&query=Tian%2C+F">Fangyuan Tian</a>, <a href="/search/?searchtype=author&query=Zhou%2C+J">Jianyi Zhou</a>, <a href="/search/?searchtype=author&query=Chen%2C+Z">Zhenyu Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.03095v4-abstract-short" style="display: inline;"> Unit test is crucial for detecting bugs in individual program units but consumes time and effort. The existing automated unit test generation methods are mainly based on search-based software testing (SBST) and language models to liberate developers. Recently, large language models (LLMs) have demonstrated remarkable reasoning and generation capabilities. However, several problems limit their abil… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03095v4-abstract-full').style.display = 'inline'; document.getElementById('2408.03095v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.03095v4-abstract-full" style="display: none;"> Unit test is crucial for detecting bugs in individual program units but consumes time and effort. The existing automated unit test generation methods are mainly based on search-based software testing (SBST) and language models to liberate developers. Recently, large language models (LLMs) have demonstrated remarkable reasoning and generation capabilities. However, several problems limit their ability to generate high-quality test cases: (1) LLMs may generate invalid test cases under insufficient context, resulting in compilation errors; (2) Lack of test and coverage feedback information may cause runtime errors and low coverage rates. (3) The repetitive suppression problem causes LLMs to get stuck into the repetition loop of self-repair or re-generation attempts. In this paper, we propose TestART, a novel unit test generation method that leverages the strengths of LLMs while overcoming the limitations mentioned. TestART improves LLM-based unit test via co-evolution of automated generation and repair iteration. TestART leverages the template-based repair technique to fix bugs in LLM-generated test cases, using prompt injection to guide the next-step automated generation and avoid repetition suppression. Furthermore, TestART extracts coverage information from the passed test cases and utilizes it as testing feedback to enhance the sufficiency of the final test case. This synergy between generation and repair elevates the quality, effectiveness, and readability of the produced test cases significantly beyond previous methods. In comparative experiments, the pass rate of TestART-generated test cases is 78.55%, which is approximately 18% higher than both the ChatGPT-4.0 model and the same ChatGPT-3.5-based method ChatUniTest. It also achieves an impressive line coverage rate of 90.96% on the focal methods that passed the test, exceeding EvoSuite by 3.4%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.03095v4-abstract-full').style.display = 'none'; document.getElementById('2408.03095v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.01394">arXiv:2408.01394</a> <span> [<a href="https://arxiv.org/pdf/2408.01394">pdf</a>, <a href="https://arxiv.org/format/2408.01394">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Improving Multilingual Neural Machine Translation by Utilizing Semantic and Linguistic Features </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Bu%2C+M">Mengyu Bu</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shuhao Gu</a>, <a href="/search/?searchtype=author&query=Feng%2C+Y">Yang Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.01394v1-abstract-short" style="display: inline;"> The many-to-many multilingual neural machine translation can be regarded as the process of integrating semantic features from the source sentences and linguistic features from the target sentences. To enhance zero-shot translation, models need to share knowledge across languages, which can be achieved through auxiliary tasks for learning a universal representation or cross-lingual mapping. To this… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01394v1-abstract-full').style.display = 'inline'; document.getElementById('2408.01394v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.01394v1-abstract-full" style="display: none;"> The many-to-many multilingual neural machine translation can be regarded as the process of integrating semantic features from the source sentences and linguistic features from the target sentences. To enhance zero-shot translation, models need to share knowledge across languages, which can be achieved through auxiliary tasks for learning a universal representation or cross-lingual mapping. To this end, we propose to exploit both semantic and linguistic features between multiple languages to enhance multilingual translation. On the encoder side, we introduce a disentangling learning task that aligns encoder representations by disentangling semantic and linguistic features, thus facilitating knowledge transfer while preserving complete information. On the decoder side, we leverage a linguistic encoder to integrate low-level linguistic features to assist in the target language generation. Experimental results on multilingual datasets demonstrate significant improvement in zero-shot translation compared to the baseline system, while maintaining performance in supervised translation. Further analysis validates the effectiveness of our method in leveraging both semantic and linguistic features. The code is available at https://github.com/ictnlp/SemLing-MNMT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.01394v1-abstract-full').style.display = 'none'; document.getElementById('2408.01394v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ACL2024 Findings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00829">arXiv:2408.00829</a> <span> [<a href="https://arxiv.org/pdf/2408.00829">pdf</a>, <a href="https://arxiv.org/format/2408.00829">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> </div> </div> <p class="title is-5 mathjax"> Optimizing quantum error correction protocols with erasure qubits </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gu%2C+S">Shouzhen Gu</a>, <a href="/search/?searchtype=author&query=Vaknin%2C+Y">Yotam Vaknin</a>, <a href="/search/?searchtype=author&query=Retzker%2C+A">Alex Retzker</a>, <a href="/search/?searchtype=author&query=Kubica%2C+A">Aleksander Kubica</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00829v2-abstract-short" style="display: inline;"> Erasure qubits offer a promising avenue toward reducing the overhead of quantum error correction (QEC) protocols. However, they require additional operations, such as erasure checks, that may add extra noise and increase runtime of QEC protocols. To assess the benefits provided by erasure qubits, we focus on the performance of the surface code as a quantum memory. In particular, we analyze various… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00829v2-abstract-full').style.display = 'inline'; document.getElementById('2408.00829v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00829v2-abstract-full" style="display: none;"> Erasure qubits offer a promising avenue toward reducing the overhead of quantum error correction (QEC) protocols. However, they require additional operations, such as erasure checks, that may add extra noise and increase runtime of QEC protocols. To assess the benefits provided by erasure qubits, we focus on the performance of the surface code as a quantum memory. In particular, we analyze various erasure check schedules, find the correctable regions in the phase space of error parameters and probe the subthreshold scaling of the logical error rate. We then consider a realization of erasure qubits in the superconducting hardware architectures via dual-rail qubits. We use the standard transmon-based implementation of the surface code as the performance benchmark. Our results indicate that QEC protocols with erasure qubits can outperform the ones with state-of-the-art transmons, even in the absence of precise information about the locations of erasure errors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00829v2-abstract-full').style.display = 'none'; document.getElementById('2408.00829v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12+6 pages, 14 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.00545">arXiv:2408.00545</a> <span> [<a href="https://arxiv.org/pdf/2408.00545">pdf</a>, <a href="https://arxiv.org/format/2408.00545">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Collecting Larg-Scale Robotic Datasets on a High-Speed Mobile Platform </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Lin%2C+Y">Yuxin Lin</a>, <a href="/search/?searchtype=author&query=Ma%2C+J">Jiaxuan Ma</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Sizhe Gu</a>, <a href="/search/?searchtype=author&query=Kong%2C+J">Jipeng Kong</a>, <a href="/search/?searchtype=author&query=Xu%2C+B">Bowen Xu</a>, <a href="/search/?searchtype=author&query=Zhao%2C+X">Xiting Zhao</a>, <a href="/search/?searchtype=author&query=Zhao%2C+D">Dengji Zhao</a>, <a href="/search/?searchtype=author&query=Cao%2C+W">Wenhan Cao</a>, <a href="/search/?searchtype=author&query=Schwertfeger%2C+S">S枚ren Schwertfeger</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.00545v1-abstract-short" style="display: inline;"> Mobile robotics datasets are essential for research on robotics, for example for research on Simultaneous Localization and Mapping (SLAM). Therefore the ShanghaiTech Mapping Robot was constructed, that features a multitude high-performance sensors and a 16-node cluster to collect all this data. That robot is based on a Clearpath Husky mobile base with a maximum speed of 1 meter per second. This is… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00545v1-abstract-full').style.display = 'inline'; document.getElementById('2408.00545v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.00545v1-abstract-full" style="display: none;"> Mobile robotics datasets are essential for research on robotics, for example for research on Simultaneous Localization and Mapping (SLAM). Therefore the ShanghaiTech Mapping Robot was constructed, that features a multitude high-performance sensors and a 16-node cluster to collect all this data. That robot is based on a Clearpath Husky mobile base with a maximum speed of 1 meter per second. This is fine for indoor datasets, but to collect large-scale outdoor datasets a faster platform is needed. This system paper introduces our high-speed mobile platform for data collection. The mapping robot is secured on the rear-steered flatbed car with maximum field of view. Additionally two encoders collect odometry data from two of the car wheels and an external sensor plate houses a downlooking RGB and event camera. With this setup a dataset of more than 10km in the underground parking garage and the outside of our campus was collected and is published with this paper. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.00545v1-abstract-full').style.display = 'none'; document.getElementById('2408.00545v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.18290">arXiv:2407.18290</a> <span> [<a href="https://arxiv.org/pdf/2407.18290">pdf</a>, <a href="https://arxiv.org/format/2407.18290">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Several questions of visual generation in 2024 </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gu%2C+S">Shuyang Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.18290v1-abstract-short" style="display: inline;"> This paper does not propose any new algorithms but instead outlines various problems in the field of visual generation based on the author's personal understanding. The core of these problems lies in how to decompose visual signals, with all other issues being closely related to this central problem and stemming from unsuitable approaches to signal decomposition. This paper aims to draw researcher… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18290v1-abstract-full').style.display = 'inline'; document.getElementById('2407.18290v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.18290v1-abstract-full" style="display: none;"> This paper does not propose any new algorithms but instead outlines various problems in the field of visual generation based on the author's personal understanding. The core of these problems lies in how to decompose visual signals, with all other issues being closely related to this central problem and stemming from unsuitable approaches to signal decomposition. This paper aims to draw researchers' attention to the significance of Visual Signal Decomposition. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.18290v1-abstract-full').style.display = 'none'; document.getElementById('2407.18290v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.10702">arXiv:2407.10702</a> <span> [<a href="https://arxiv.org/pdf/2407.10702">pdf</a>, <a href="https://arxiv.org/ps/2407.10702">ps</a>, <a href="https://arxiv.org/format/2407.10702">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Geometric Analysis of Unconstrained Feature Models with $d=K$ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Shen%2C+Y">Yi Shen</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shao Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.10702v2-abstract-short" style="display: inline;"> Recently, interesting empirical phenomena known as Neural Collapse have been observed during the final phase of training deep neural networks for classification tasks. We examine this issue when the feature dimension d is equal to the number of classes K. We demonstrate that two popular unconstrained feature models are strict saddle functions, with every critical point being either a global minimu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10702v2-abstract-full').style.display = 'inline'; document.getElementById('2407.10702v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.10702v2-abstract-full" style="display: none;"> Recently, interesting empirical phenomena known as Neural Collapse have been observed during the final phase of training deep neural networks for classification tasks. We examine this issue when the feature dimension d is equal to the number of classes K. We demonstrate that two popular unconstrained feature models are strict saddle functions, with every critical point being either a global minimum or a strict saddle point that can be exited using negative curvatures. The primary findings conclusively confirm the conjecture on the unconstrained feature models in previous articles. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.10702v2-abstract-full').style.display = 'none'; document.getElementById('2407.10702v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.06109">arXiv:2407.06109</a> <span> [<a href="https://arxiv.org/pdf/2407.06109">pdf</a>, <a href="https://arxiv.org/format/2407.06109">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PerlDiff: Controllable Street View Synthesis Using Perspective-Layout Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+J">Jinhua Zhang</a>, <a href="/search/?searchtype=author&query=Sheng%2C+H">Hualian Sheng</a>, <a href="/search/?searchtype=author&query=Cai%2C+S">Sijia Cai</a>, <a href="/search/?searchtype=author&query=Deng%2C+B">Bing Deng</a>, <a href="/search/?searchtype=author&query=Liang%2C+Q">Qiao Liang</a>, <a href="/search/?searchtype=author&query=Li%2C+W">Wen Li</a>, <a href="/search/?searchtype=author&query=Fu%2C+Y">Ying Fu</a>, <a href="/search/?searchtype=author&query=Ye%2C+J">Jieping Ye</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shuhang Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.06109v2-abstract-short" style="display: inline;"> Controllable generation is considered a potentially vital approach to address the challenge of annotating 3D data, and the precision of such controllable generation becomes particularly imperative in the context of data production for autonomous driving. Existing methods focus on the integration of diverse generative information into controlling inputs, utilizing frameworks such as GLIGEN or Contr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06109v2-abstract-full').style.display = 'inline'; document.getElementById('2407.06109v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.06109v2-abstract-full" style="display: none;"> Controllable generation is considered a potentially vital approach to address the challenge of annotating 3D data, and the precision of such controllable generation becomes particularly imperative in the context of data production for autonomous driving. Existing methods focus on the integration of diverse generative information into controlling inputs, utilizing frameworks such as GLIGEN or ControlNet, to produce commendable outcomes in controllable generation. However, such approaches intrinsically restrict generation performance to the learning capacities of predefined network architectures. In this paper, we explore the integration of controlling information and introduce PerlDiff (Perspective-Layout Diffusion Models), a method for effective street view image generation that fully leverages perspective 3D geometric information. Our PerlDiff employs 3D geometric priors to guide the generation of street view images with precise object-level control within the network learning process, resulting in a more robust and controllable output. Moreover, it demonstrates superior controllability compared to alternative layout control methods. Empirical results justify that our PerlDiff markedly enhances the precision of generation on the NuScenes and KITTI datasets. Our codes and models are publicly available at https://github.com/LabShuHangGU/PerlDiff. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.06109v2-abstract-full').style.display = 'none'; document.getElementById('2407.06109v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.03297">arXiv:2407.03297</a> <span> [<a href="https://arxiv.org/pdf/2407.03297">pdf</a>, <a href="https://arxiv.org/format/2407.03297">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Improved Noise Schedule for Diffusion Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Hang%2C+T">Tiankai Hang</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shuyang Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.03297v1-abstract-short" style="display: inline;"> Diffusion models have emerged as the de facto choice for generating visual signals. However, training a single model to predict noise across various levels poses significant challenges, necessitating numerous iterations and incurring significant computational costs. Various approaches, such as loss weighting strategy design and architectural refinements, have been introduced to expedite convergenc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03297v1-abstract-full').style.display = 'inline'; document.getElementById('2407.03297v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.03297v1-abstract-full" style="display: none;"> Diffusion models have emerged as the de facto choice for generating visual signals. However, training a single model to predict noise across various levels poses significant challenges, necessitating numerous iterations and incurring significant computational costs. Various approaches, such as loss weighting strategy design and architectural refinements, have been introduced to expedite convergence. In this study, we propose a novel approach to design the noise schedule for enhancing the training of diffusion models. Our key insight is that the importance sampling of the logarithm of the Signal-to-Noise ratio (logSNR), theoretically equivalent to a modified noise schedule, is particularly beneficial for training efficiency when increasing the sample frequency around $\log \text{SNR}=0$. We empirically demonstrate the superiority of our noise schedule over the standard cosine schedule. Furthermore, we highlight the advantages of our noise schedule design on the ImageNet benchmark, showing that the designed schedule consistently benefits different prediction targets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03297v1-abstract-full').style.display = 'none'; document.getElementById('2407.03297v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.03152">arXiv:2407.03152</a> <span> [<a href="https://arxiv.org/pdf/2407.03152">pdf</a>, <a href="https://arxiv.org/format/2407.03152">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Stereo Risk: A Continuous Modeling Approach to Stereo Matching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Liu%2C+C">Ce Liu</a>, <a href="/search/?searchtype=author&query=Kumar%2C+S">Suryansh Kumar</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shuhang Gu</a>, <a href="/search/?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/?searchtype=author&query=Yao%2C+Y">Yao Yao</a>, <a href="/search/?searchtype=author&query=Van+Gool%2C+L">Luc Van Gool</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.03152v1-abstract-short" style="display: inline;"> We introduce Stereo Risk, a new deep-learning approach to solve the classical stereo-matching problem in computer vision. As it is well-known that stereo matching boils down to a per-pixel disparity estimation problem, the popular state-of-the-art stereo-matching approaches widely rely on regressing the scene disparity values, yet via discretization of scene disparity values. Such discretization o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03152v1-abstract-full').style.display = 'inline'; document.getElementById('2407.03152v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.03152v1-abstract-full" style="display: none;"> We introduce Stereo Risk, a new deep-learning approach to solve the classical stereo-matching problem in computer vision. As it is well-known that stereo matching boils down to a per-pixel disparity estimation problem, the popular state-of-the-art stereo-matching approaches widely rely on regressing the scene disparity values, yet via discretization of scene disparity values. Such discretization often fails to capture the nuanced, continuous nature of scene depth. Stereo Risk departs from the conventional discretization approach by formulating the scene disparity as an optimal solution to a continuous risk minimization problem, hence the name "stereo risk". We demonstrate that $L^1$ minimization of the proposed continuous risk function enhances stereo-matching performance for deep networks, particularly for disparities with multi-modal probability distributions. Furthermore, to enable the end-to-end network training of the non-differentiable $L^1$ risk optimization, we exploited the implicit function theorem, ensuring a fully differentiable network. A comprehensive analysis demonstrates our method's theoretical soundness and superior performance over the state-of-the-art methods across various benchmark datasets, including KITTI 2012, KITTI 2015, ETH3D, SceneFlow, and Middlebury 2014. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.03152v1-abstract-full').style.display = 'none'; document.getElementById('2407.03152v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted as an Oral Paper at ICML 2024. Draft info: 18 pages, 6 Figure, 16 Tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.01648">arXiv:2407.01648</a> <span> [<a href="https://arxiv.org/pdf/2407.01648">pdf</a>, <a href="https://arxiv.org/format/2407.01648">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Aligning Target-Aware Molecule Diffusion Models with Exact Energy Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gu%2C+S">Siyi Gu</a>, <a href="/search/?searchtype=author&query=Xu%2C+M">Minkai Xu</a>, <a href="/search/?searchtype=author&query=Powers%2C+A">Alexander Powers</a>, <a href="/search/?searchtype=author&query=Nie%2C+W">Weili Nie</a>, <a href="/search/?searchtype=author&query=Geffner%2C+T">Tomas Geffner</a>, <a href="/search/?searchtype=author&query=Kreis%2C+K">Karsten Kreis</a>, <a href="/search/?searchtype=author&query=Leskovec%2C+J">Jure Leskovec</a>, <a href="/search/?searchtype=author&query=Vahdat%2C+A">Arash Vahdat</a>, <a href="/search/?searchtype=author&query=Ermon%2C+S">Stefano Ermon</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.01648v2-abstract-short" style="display: inline;"> Generating ligand molecules for specific protein targets, known as structure-based drug design, is a fundamental problem in therapeutics development and biological discovery. Recently, target-aware generative models, especially diffusion models, have shown great promise in modeling protein-ligand interactions and generating candidate drugs. However, existing models primarily focus on learning the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01648v2-abstract-full').style.display = 'inline'; document.getElementById('2407.01648v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.01648v2-abstract-full" style="display: none;"> Generating ligand molecules for specific protein targets, known as structure-based drug design, is a fundamental problem in therapeutics development and biological discovery. Recently, target-aware generative models, especially diffusion models, have shown great promise in modeling protein-ligand interactions and generating candidate drugs. However, existing models primarily focus on learning the chemical distribution of all drug candidates, which lacks effective steerability on the chemical quality of model generations. In this paper, we propose a novel and general alignment framework to align pretrained target diffusion models with preferred functional properties, named AliDiff. AliDiff shifts the target-conditioned chemical distribution towards regions with higher binding affinity and structural rationality, specified by user-defined reward functions, via the preference optimization approach. To avoid the overfitting problem in common preference optimization objectives, we further develop an improved Exact Energy Preference Optimization method to yield an exact and efficient alignment of the diffusion models, and provide the closed-form expression for the converged distribution. Empirical studies on the CrossDocked2020 benchmark show that AliDiff can generate molecules with state-of-the-art binding energies with up to -7.07 Avg. Vina Score, while maintaining strong molecular properties. Code is available at https://github.com/MinkaiXu/AliDiff. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.01648v2-abstract-full').style.display = 'none'; document.getElementById('2407.01648v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.00136">arXiv:2407.00136</a> <span> [<a href="https://arxiv.org/pdf/2407.00136">pdf</a>, <a href="https://arxiv.org/format/2407.00136">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> </div> </div> <p class="title is-5 mathjax"> Observation of the Electromagnetic Dalitz Transition $h_c \rightarrow e^+e^-畏_c$ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=BESIII+Collaboration"> BESIII Collaboration</a>, <a href="/search/?searchtype=author&query=Ablikim%2C+M">M. Ablikim</a>, <a href="/search/?searchtype=author&query=Achasov%2C+M+N">M. N. Achasov</a>, <a href="/search/?searchtype=author&query=Adlarson%2C+P">P. Adlarson</a>, <a href="/search/?searchtype=author&query=Ahmed%2C+S">S. Ahmed</a>, <a href="/search/?searchtype=author&query=Albrecht%2C+M">M. Albrecht</a>, <a href="/search/?searchtype=author&query=Aliberti%2C+R">R. Aliberti</a>, <a href="/search/?searchtype=author&query=Amoroso%2C+A">A. Amoroso</a>, <a href="/search/?searchtype=author&query=An%2C+M+R">M. R. An</a>, <a href="/search/?searchtype=author&query=An%2C+Q">Q. An</a>, <a href="/search/?searchtype=author&query=Bai%2C+X+H">X. H. Bai</a>, <a href="/search/?searchtype=author&query=Bai%2C+Y">Y. Bai</a>, <a href="/search/?searchtype=author&query=Bakina%2C+O">O. Bakina</a>, <a href="/search/?searchtype=author&query=Ferroli%2C+R+B">R. Baldini Ferroli</a>, <a href="/search/?searchtype=author&query=Balossino%2C+I">I. Balossino</a>, <a href="/search/?searchtype=author&query=Ban%2C+Y">Y. Ban</a>, <a href="/search/?searchtype=author&query=Begzsuren%2C+K">K. Begzsuren</a>, <a href="/search/?searchtype=author&query=Berger%2C+N">N. Berger</a>, <a href="/search/?searchtype=author&query=Bertani%2C+M">M. Bertani</a>, <a href="/search/?searchtype=author&query=Bettoni%2C+D">D. Bettoni</a>, <a href="/search/?searchtype=author&query=Bianchi%2C+F">F. Bianchi</a>, <a href="/search/?searchtype=author&query=Bloms%2C+J">J. Bloms</a>, <a href="/search/?searchtype=author&query=Bortone%2C+A">A. Bortone</a>, <a href="/search/?searchtype=author&query=Boyko%2C+I">I. Boyko</a>, <a href="/search/?searchtype=author&query=Briere%2C+R+A">R. A. Briere</a> , et al. (495 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.00136v2-abstract-short" style="display: inline;"> Using $(27.12\pm 0.14)\times10^8$ $蠄(3686)$ decays and data samples of $e^+e^-$ collisions with $\sqrt{s}$ from 4.130 to 4.780~GeV collected with the BESIII detector, we report the first observation of the electromagnetic Dalitz transition $h_c\to e^+e^-畏_c$ with a statistical significance of $5.4蟽$. We measure the ratio of the branching fractions… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00136v2-abstract-full').style.display = 'inline'; document.getElementById('2407.00136v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.00136v2-abstract-full" style="display: none;"> Using $(27.12\pm 0.14)\times10^8$ $蠄(3686)$ decays and data samples of $e^+e^-$ collisions with $\sqrt{s}$ from 4.130 to 4.780~GeV collected with the BESIII detector, we report the first observation of the electromagnetic Dalitz transition $h_c\to e^+e^-畏_c$ with a statistical significance of $5.4蟽$. We measure the ratio of the branching fractions $\frac{\mathcal{B}(h_c\rightarrow e^+e^-畏_c)}{\mathcal{B}(h_c\rightarrow 纬畏_c)}$ separately for the $h_c$ samples produced via $蠄(3686)\to蟺^0h_c$ and $e^+e^-\to蟺^+蟺^-h_c$. The average ratio is determined to be $(0.59\pm0.10(\text{stat.})\pm0.04(\text{syst.}))\%$, where the uncertainty includes both statistical and systematic components. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.00136v2-abstract-full').style.display = 'none'; document.getElementById('2407.00136v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.08392">arXiv:2406.08392</a> <span> [<a href="https://arxiv.org/pdf/2406.08392">pdf</a>, <a href="https://arxiv.org/format/2406.08392">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> FontStudio: Shape-Adaptive Diffusion Model for Coherent and Consistent Font Effect Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Mu%2C+X">Xinzhi Mu</a>, <a href="/search/?searchtype=author&query=Chen%2C+L">Li Chen</a>, <a href="/search/?searchtype=author&query=Chen%2C+B">Bohan Chen</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shuyang Gu</a>, <a href="/search/?searchtype=author&query=Bao%2C+J">Jianmin Bao</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dong Chen</a>, <a href="/search/?searchtype=author&query=Li%2C+J">Ji Li</a>, <a href="/search/?searchtype=author&query=Yuan%2C+Y">Yuhui Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.08392v1-abstract-short" style="display: inline;"> Recently, the application of modern diffusion-based text-to-image generation models for creating artistic fonts, traditionally the domain of professional designers, has garnered significant interest. Diverging from the majority of existing studies that concentrate on generating artistic typography, our research aims to tackle a novel and more demanding challenge: the generation of text effects for… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08392v1-abstract-full').style.display = 'inline'; document.getElementById('2406.08392v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.08392v1-abstract-full" style="display: none;"> Recently, the application of modern diffusion-based text-to-image generation models for creating artistic fonts, traditionally the domain of professional designers, has garnered significant interest. Diverging from the majority of existing studies that concentrate on generating artistic typography, our research aims to tackle a novel and more demanding challenge: the generation of text effects for multilingual fonts. This task essentially requires generating coherent and consistent visual content within the confines of a font-shaped canvas, as opposed to a traditional rectangular canvas. To address this task, we introduce a novel shape-adaptive diffusion model capable of interpreting the given shape and strategically planning pixel distributions within the irregular canvas. To achieve this, we curate a high-quality shape-adaptive image-text dataset and incorporate the segmentation mask as a visual condition to steer the image generation process within the irregular-canvas. This approach enables the traditionally rectangle canvas-based diffusion model to produce the desired concepts in accordance with the provided geometric shapes. Second, to maintain consistency across multiple letters, we also present a training-free, shape-adaptive effect transfer method for transferring textures from a generated reference letter to others. The key insights are building a font effect noise prior and propagating the font effect information in a concatenated latent space. The efficacy of our FontStudio system is confirmed through user preference studies, which show a marked preference (78% win-rates on aesthetics) for our system even when compared to the latest unrivaled commercial product, Adobe Firefly. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.08392v1-abstract-full').style.display = 'none'; document.getElementById('2406.08392v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project-page: https://font-studio.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.04314">arXiv:2406.04314</a> <span> [<a href="https://arxiv.org/pdf/2406.04314">pdf</a>, <a href="https://arxiv.org/format/2406.04314">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Step-aware Preference Optimization: Aligning Preference with Denoising Performance at Each Step </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Liang%2C+Z">Zhanhao Liang</a>, <a href="/search/?searchtype=author&query=Yuan%2C+Y">Yuhui Yuan</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shuyang Gu</a>, <a href="/search/?searchtype=author&query=Chen%2C+B">Bohan Chen</a>, <a href="/search/?searchtype=author&query=Hang%2C+T">Tiankai Hang</a>, <a href="/search/?searchtype=author&query=Li%2C+J">Ji Li</a>, <a href="/search/?searchtype=author&query=Zheng%2C+L">Liang Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.04314v1-abstract-short" style="display: inline;"> Recently, Direct Preference Optimization (DPO) has extended its success from aligning large language models (LLMs) to aligning text-to-image diffusion models with human preferences. Unlike most existing DPO methods that assume all diffusion steps share a consistent preference order with the final generated images, we argue that this assumption neglects step-specific denoising performance and that… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04314v1-abstract-full').style.display = 'inline'; document.getElementById('2406.04314v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.04314v1-abstract-full" style="display: none;"> Recently, Direct Preference Optimization (DPO) has extended its success from aligning large language models (LLMs) to aligning text-to-image diffusion models with human preferences. Unlike most existing DPO methods that assume all diffusion steps share a consistent preference order with the final generated images, we argue that this assumption neglects step-specific denoising performance and that preference labels should be tailored to each step's contribution. To address this limitation, we propose Step-aware Preference Optimization (SPO), a novel post-training approach that independently evaluates and adjusts the denoising performance at each step, using a step-aware preference model and a step-wise resampler to ensure accurate step-aware supervision. Specifically, at each denoising step, we sample a pool of images, find a suitable win-lose pair, and, most importantly, randomly select a single image from the pool to initialize the next denoising step. This step-wise resampler process ensures the next win-lose image pair comes from the same image, making the win-lose comparison independent of the previous step. To assess the preferences at each step, we train a separate step-aware preference model that can be applied to both noisy and clean images. Our experiments with Stable Diffusion v1.5 and SDXL demonstrate that SPO significantly outperforms the latest Diffusion-DPO in aligning generated images with complex, detailed prompts and enhancing aesthetics, while also achieving more than 20x times faster in training efficiency. Code and model: https://rockeycoss.github.io/spo.github.io/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.04314v1-abstract-full').style.display = 'none'; document.getElementById('2406.04314v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.02147">arXiv:2406.02147</a> <span> [<a href="https://arxiv.org/pdf/2406.02147">pdf</a>, <a href="https://arxiv.org/format/2406.02147">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> UA-Track: Uncertainty-Aware End-to-End 3D Multi-Object Tracking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhou%2C+L">Lijun Zhou</a>, <a href="/search/?searchtype=author&query=Tang%2C+T">Tao Tang</a>, <a href="/search/?searchtype=author&query=Hao%2C+P">Pengkun Hao</a>, <a href="/search/?searchtype=author&query=He%2C+Z">Zihang He</a>, <a href="/search/?searchtype=author&query=Ho%2C+K">Kalok Ho</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shuo Gu</a>, <a href="/search/?searchtype=author&query=Hou%2C+W">Wenbo Hou</a>, <a href="/search/?searchtype=author&query=Hao%2C+Z">Zhihui Hao</a>, <a href="/search/?searchtype=author&query=Sun%2C+H">Haiyang Sun</a>, <a href="/search/?searchtype=author&query=Zhan%2C+K">Kun Zhan</a>, <a href="/search/?searchtype=author&query=Jia%2C+P">Peng Jia</a>, <a href="/search/?searchtype=author&query=Lang%2C+X">Xianpeng Lang</a>, <a href="/search/?searchtype=author&query=Liang%2C+X">Xiaodan Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.02147v1-abstract-short" style="display: inline;"> 3D multiple object tracking (MOT) plays a crucial role in autonomous driving perception. Recent end-to-end query-based trackers simultaneously detect and track objects, which have shown promising potential for the 3D MOT task. However, existing methods overlook the uncertainty issue, which refers to the lack of precise confidence about the state and location of tracked objects. Uncertainty arises… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02147v1-abstract-full').style.display = 'inline'; document.getElementById('2406.02147v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.02147v1-abstract-full" style="display: none;"> 3D multiple object tracking (MOT) plays a crucial role in autonomous driving perception. Recent end-to-end query-based trackers simultaneously detect and track objects, which have shown promising potential for the 3D MOT task. However, existing methods overlook the uncertainty issue, which refers to the lack of precise confidence about the state and location of tracked objects. Uncertainty arises owing to various factors during motion observation by cameras, especially occlusions and the small size of target objects, resulting in an inaccurate estimation of the object's position, label, and identity. To this end, we propose an Uncertainty-Aware 3D MOT framework, UA-Track, which tackles the uncertainty problem from multiple aspects. Specifically, we first introduce an Uncertainty-aware Probabilistic Decoder to capture the uncertainty in object prediction with probabilistic attention. Secondly, we propose an Uncertainty-guided Query Denoising strategy to further enhance the training process. We also utilize Uncertainty-reduced Query Initialization, which leverages predicted 2D object location and depth information to reduce query uncertainty. As a result, our UA-Track achieves state-of-the-art performance on the nuScenes benchmark, i.e., 66.3% AMOTA on the test split, surpassing the previous best end-to-end solution by a significant margin of 8.9% AMOTA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.02147v1-abstract-full').style.display = 'none'; document.getElementById('2406.02147v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.20860">arXiv:2405.20860</a> <span> [<a href="https://arxiv.org/pdf/2405.20860">pdf</a>, <a href="https://arxiv.org/format/2405.20860">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Efficiency of Safe Reinforcement Learning via Sample Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gu%2C+S">Shangding Gu</a>, <a href="/search/?searchtype=author&query=Shi%2C+L">Laixi Shi</a>, <a href="/search/?searchtype=author&query=Ding%2C+Y">Yuhao Ding</a>, <a href="/search/?searchtype=author&query=Knoll%2C+A">Alois Knoll</a>, <a href="/search/?searchtype=author&query=Spanos%2C+C">Costas Spanos</a>, <a href="/search/?searchtype=author&query=Wierman%2C+A">Adam Wierman</a>, <a href="/search/?searchtype=author&query=Jin%2C+M">Ming Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.20860v1-abstract-short" style="display: inline;"> Safe reinforcement learning (RL) is crucial for deploying RL agents in real-world applications, as it aims to maximize long-term rewards while satisfying safety constraints. However, safe RL often suffers from sample inefficiency, requiring extensive interactions with the environment to learn a safe policy. We propose Efficient Safe Policy Optimization (ESPO), a novel approach that enhances the ef… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20860v1-abstract-full').style.display = 'inline'; document.getElementById('2405.20860v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.20860v1-abstract-full" style="display: none;"> Safe reinforcement learning (RL) is crucial for deploying RL agents in real-world applications, as it aims to maximize long-term rewards while satisfying safety constraints. However, safe RL often suffers from sample inefficiency, requiring extensive interactions with the environment to learn a safe policy. We propose Efficient Safe Policy Optimization (ESPO), a novel approach that enhances the efficiency of safe RL through sample manipulation. ESPO employs an optimization framework with three modes: maximizing rewards, minimizing costs, and balancing the trade-off between the two. By dynamically adjusting the sampling process based on the observed conflict between reward and safety gradients, ESPO theoretically guarantees convergence, optimization stability, and improved sample complexity bounds. Experiments on the Safety-MuJoCo and Omnisafe benchmarks demonstrate that ESPO significantly outperforms existing primal-based and primal-dual-based baselines in terms of reward maximization and constraint satisfaction. Moreover, ESPO achieves substantial gains in sample efficiency, requiring 25--29% fewer samples than baselines, and reduces training time by 21--38%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.20860v1-abstract-full').style.display = 'none'; document.getElementById('2405.20860v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.18209">arXiv:2405.18209</a> <span> [<a href="https://arxiv.org/pdf/2405.18209">pdf</a>, <a href="https://arxiv.org/format/2405.18209">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Safe Multi-Agent Reinforcement Learning with Bilevel Optimization in Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zheng%2C+Z">Zhi Zheng</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shangding Gu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.18209v1-abstract-short" style="display: inline;"> Ensuring safety in MARL, particularly when deploying it in real-world applications such as autonomous driving, emerges as a critical challenge. To address this challenge, traditional safe MARL methods extend MARL approaches to incorporate safety considerations, aiming to minimize safety risk values. However, these safe MARL algorithms often fail to model other agents and lack convergence guarantee… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18209v1-abstract-full').style.display = 'inline'; document.getElementById('2405.18209v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.18209v1-abstract-full" style="display: none;"> Ensuring safety in MARL, particularly when deploying it in real-world applications such as autonomous driving, emerges as a critical challenge. To address this challenge, traditional safe MARL methods extend MARL approaches to incorporate safety considerations, aiming to minimize safety risk values. However, these safe MARL algorithms often fail to model other agents and lack convergence guarantees, particularly in dynamically complex environments. In this study, we propose a safe MARL method grounded in a Stackelberg model with bi-level optimization, for which convergence analysis is provided. Derived from our theoretical analysis, we develop two practical algorithms, namely Constrained Stackelberg Q-learning (CSQ) and Constrained Stackelberg Multi-Agent Deep Deterministic Policy Gradient (CS-MADDPG), designed to facilitate MARL decision-making in autonomous driving applications. To evaluate the effectiveness of our algorithms, we developed a safe MARL autonomous driving benchmark and conducted experiments on challenging autonomous driving scenarios, such as merges, roundabouts, intersections, and racetracks. The experimental results indicate that our algorithms, CSQ and CS-MADDPG, outperform several strong MARL baselines, such as Bi-AC, MACPO, and MAPPO-L, regarding reward and safety performance. The demos and source code are available at {https://github.com/SafeRL-Lab/Safe-MARL-in-Autonomous-Driving.git}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.18209v1-abstract-full').style.display = 'none'; document.getElementById('2405.18209v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16414">arXiv:2405.16414</a> <span> [<a href="https://arxiv.org/pdf/2405.16414">pdf</a>, <a href="https://arxiv.org/format/2405.16414">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Robust Message Embedding via Attention Flow-Based Steganography </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Ye%2C+H">Huayuan Ye</a>, <a href="/search/?searchtype=author&query=Zhang%2C+S">Shenzhuo Zhang</a>, <a href="/search/?searchtype=author&query=Jiang%2C+S">Shiqi Jiang</a>, <a href="/search/?searchtype=author&query=Liao%2C+J">Jing Liao</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shuhang Gu</a>, <a href="/search/?searchtype=author&query=Zheng%2C+D">Dejun Zheng</a>, <a href="/search/?searchtype=author&query=Wang%2C+C">Changbo Wang</a>, <a href="/search/?searchtype=author&query=Li%2C+C">Chenhui Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16414v2-abstract-short" style="display: inline;"> Image steganography can hide information in a host image and obtain a stego image that is perceptually indistinguishable from the original one. This technique has tremendous potential in scenarios like copyright protection, information retrospection, etc. Some previous studies have proposed to enhance the robustness of the methods against image disturbances to increase their applicability. However… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16414v2-abstract-full').style.display = 'inline'; document.getElementById('2405.16414v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16414v2-abstract-full" style="display: none;"> Image steganography can hide information in a host image and obtain a stego image that is perceptually indistinguishable from the original one. This technique has tremendous potential in scenarios like copyright protection, information retrospection, etc. Some previous studies have proposed to enhance the robustness of the methods against image disturbances to increase their applicability. However, they generally cannot achieve a satisfying balance between the steganography quality and robustness. Instead of image-in-image steganography, we focus on the issue of message-in-image embedding that is robust to various real-world image distortions. This task aims to embed information into a natural image and the decoding result is required to be completely accurate, which increases the difficulty of data concealing and revealing. Inspired by the recent developments in transformer-based vision models, we discover that the tokenized representation of image is naturally suitable for steganography task. In this paper, we propose a novel message embedding framework, called Robust Message Steganography (RMSteg), which is competent to hide message via QR Code in a host image based on an normalizing flow-based model. The stego image derived by our method has imperceptible changes and the encoded message can be accurately restored even if the image is printed out and photoed. To our best knowledge, this is the first work that integrates the advantages of transformer models into normalizing flow. Our experiment result shows that RMSteg has great potential in robust and high-quality message embedding. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16414v2-abstract-full').style.display = 'none'; document.getElementById('2405.16414v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 content pages, 16 appendix pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16390">arXiv:2405.16390</a> <span> [<a href="https://arxiv.org/pdf/2405.16390">pdf</a>, <a href="https://arxiv.org/format/2405.16390">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Safe and Balanced: A Framework for Constrained Multi-Objective Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gu%2C+S">Shangding Gu</a>, <a href="/search/?searchtype=author&query=Sel%2C+B">Bilgehan Sel</a>, <a href="/search/?searchtype=author&query=Ding%2C+Y">Yuhao Ding</a>, <a href="/search/?searchtype=author&query=Wang%2C+L">Lu Wang</a>, <a href="/search/?searchtype=author&query=Lin%2C+Q">Qingwei Lin</a>, <a href="/search/?searchtype=author&query=Knoll%2C+A">Alois Knoll</a>, <a href="/search/?searchtype=author&query=Jin%2C+M">Ming Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16390v1-abstract-short" style="display: inline;"> In numerous reinforcement learning (RL) problems involving safety-critical systems, a key challenge lies in balancing multiple objectives while simultaneously meeting all stringent safety constraints. To tackle this issue, we propose a primal-based framework that orchestrates policy optimization between multi-objective learning and constraint adherence. Our method employs a novel natural policy gr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16390v1-abstract-full').style.display = 'inline'; document.getElementById('2405.16390v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16390v1-abstract-full" style="display: none;"> In numerous reinforcement learning (RL) problems involving safety-critical systems, a key challenge lies in balancing multiple objectives while simultaneously meeting all stringent safety constraints. To tackle this issue, we propose a primal-based framework that orchestrates policy optimization between multi-objective learning and constraint adherence. Our method employs a novel natural policy gradient manipulation method to optimize multiple RL objectives and overcome conflicting gradients between different tasks, since the simple weighted average gradient direction may not be beneficial for specific tasks' performance due to misaligned gradients of different task objectives. When there is a violation of a hard constraint, our algorithm steps in to rectify the policy to minimize this violation. We establish theoretical convergence and constraint violation guarantees in a tabular setting. Empirically, our proposed method also outperforms prior state-of-the-art methods on challenging safe multi-objective reinforcement learning tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16390v1-abstract-full').style.display = 'none'; document.getElementById('2405.16390v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.16256">arXiv:2405.16256</a> <span> [<a href="https://arxiv.org/pdf/2405.16256">pdf</a>, <a href="https://arxiv.org/format/2405.16256">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> HETHUB: A Distributed Training System with Heterogeneous Cluster for Large-Scale Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Xu%2C+S">Si Xu</a>, <a href="/search/?searchtype=author&query=Huang%2C+Z">Zixiao Huang</a>, <a href="/search/?searchtype=author&query=Zeng%2C+Y">Yan Zeng</a>, <a href="/search/?searchtype=author&query=Yan%2C+S">Shengen Yan</a>, <a href="/search/?searchtype=author&query=Ning%2C+X">Xuefei Ning</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Q">Quanlu Zhang</a>, <a href="/search/?searchtype=author&query=Ye%2C+H">Haolin Ye</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Sipei Gu</a>, <a href="/search/?searchtype=author&query=Shui%2C+C">Chunsheng Shui</a>, <a href="/search/?searchtype=author&query=Lin%2C+Z">Zhezheng Lin</a>, <a href="/search/?searchtype=author&query=Zhang%2C+H">Hao Zhang</a>, <a href="/search/?searchtype=author&query=Wang%2C+S">Sheng Wang</a>, <a href="/search/?searchtype=author&query=Dai%2C+G">Guohao Dai</a>, <a href="/search/?searchtype=author&query=Wang%2C+Y">Yu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.16256v2-abstract-short" style="display: inline;"> Training large-scale models relies on a vast number of computing resources. For example, training the GPT-4 model (1.8 trillion parameters) requires 25000 A100 GPUs . It is a challenge to build a large-scale cluster with one type of GPU-accelerator. Using multiple types of GPU-accelerators to construct a large-scale cluster is an effective way to solve the problem of insufficient homogeneous GPU-a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16256v2-abstract-full').style.display = 'inline'; document.getElementById('2405.16256v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.16256v2-abstract-full" style="display: none;"> Training large-scale models relies on a vast number of computing resources. For example, training the GPT-4 model (1.8 trillion parameters) requires 25000 A100 GPUs . It is a challenge to build a large-scale cluster with one type of GPU-accelerator. Using multiple types of GPU-accelerators to construct a large-scale cluster is an effective way to solve the problem of insufficient homogeneous GPU-accelerators. However, the existing distributed training systems for large-scale models only support homogeneous GPU-accelerators, not support heterogeneous GPU-accelerators. To address the problem, this paper proposes a distributed training system with hybrid parallelism, HETHUB, for large-scale models, which supports heterogeneous cluster, including AMD, Nvidia GPU and other types of GPU-accelerators . It introduces a distributed unified communicator to realize the communication between heterogeneous GPU-accelerators, a distributed performance predictor, and an automatic parallel planner to develop and train models efficiently with heterogeneous GPU-accelerators. Compared to the distributed training system with homogeneous GPU-accelerators, our system can support six combinations of heterogeneous GPU-accelerators. We train the Llama-140B model on a heterogeneous cluster with 768 GPU-accelerators(128 AMD and 640 GPU-accelerator A). The experiment results show that the optimal performance of our system in the heterogeneous cluster has achieved up to 97.49% of the theoretical upper bound performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.16256v2-abstract-full').style.display = 'none'; document.getElementById('2405.16256v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.06001">arXiv:2405.06001</a> <span> [<a href="https://arxiv.org/pdf/2405.06001">pdf</a>, <a href="https://arxiv.org/format/2405.06001">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LLMC: Benchmarking Large Language Model Quantization with a Versatile Compression Toolkit </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gong%2C+R">Ruihao Gong</a>, <a href="/search/?searchtype=author&query=Yong%2C+Y">Yang Yong</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shiqiao Gu</a>, <a href="/search/?searchtype=author&query=Huang%2C+Y">Yushi Huang</a>, <a href="/search/?searchtype=author&query=Lv%2C+C">Chengtao Lv</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yunchen Zhang</a>, <a href="/search/?searchtype=author&query=Liu%2C+X">Xianglong Liu</a>, <a href="/search/?searchtype=author&query=Tao%2C+D">Dacheng Tao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.06001v3-abstract-short" style="display: inline;"> Recent advancements in large language models (LLMs) are propelling us toward artificial general intelligence with their remarkable emergent abilities and reasoning capabilities. However, the substantial computational and memory requirements limit the widespread adoption. Quantization, a key compression technique, can effectively mitigate these demands by compressing and accelerating LLMs, albeit w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06001v3-abstract-full').style.display = 'inline'; document.getElementById('2405.06001v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.06001v3-abstract-full" style="display: none;"> Recent advancements in large language models (LLMs) are propelling us toward artificial general intelligence with their remarkable emergent abilities and reasoning capabilities. However, the substantial computational and memory requirements limit the widespread adoption. Quantization, a key compression technique, can effectively mitigate these demands by compressing and accelerating LLMs, albeit with potential risks to accuracy. Numerous studies have aimed to minimize the accuracy loss associated with quantization. However, their quantization configurations vary from each other and cannot be fairly compared. In this paper, we present LLMC, a plug-and-play compression toolkit, to fairly and systematically explore the impact of quantization. LLMC integrates dozens of algorithms, models, and hardwares, offering high extensibility from integer to floating-point quantization, from LLM to vision-language (VLM) model, from fixed-bit to mixed precision, and from quantization to sparsification. Powered by this versatile toolkit, our benchmark covers three key aspects: calibration data, algorithms (three strategies), and data formats, providing novel insights and detailed analyses for further research and practical guidance for users. Our toolkit is available at https://github.com/ModelTC/llmc. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.06001v3-abstract-full').style.display = 'none'; document.getElementById('2405.06001v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by EMNLP 2024 Industry Track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.01677">arXiv:2405.01677</a> <span> [<a href="https://arxiv.org/pdf/2405.01677">pdf</a>, <a href="https://arxiv.org/format/2405.01677">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Balance Reward and Safety Optimization for Safe Reinforcement Learning: A Perspective of Gradient Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gu%2C+S">Shangding Gu</a>, <a href="/search/?searchtype=author&query=Sel%2C+B">Bilgehan Sel</a>, <a href="/search/?searchtype=author&query=Ding%2C+Y">Yuhao Ding</a>, <a href="/search/?searchtype=author&query=Wang%2C+L">Lu Wang</a>, <a href="/search/?searchtype=author&query=Lin%2C+Q">Qingwei Lin</a>, <a href="/search/?searchtype=author&query=Jin%2C+M">Ming Jin</a>, <a href="/search/?searchtype=author&query=Knoll%2C+A">Alois Knoll</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.01677v2-abstract-short" style="display: inline;"> Ensuring the safety of Reinforcement Learning (RL) is crucial for its deployment in real-world applications. Nevertheless, managing the trade-off between reward and safety during exploration presents a significant challenge. Improving reward performance through policy adjustments may adversely affect safety performance. In this study, we aim to address this conflicting relation by leveraging the t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01677v2-abstract-full').style.display = 'inline'; document.getElementById('2405.01677v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.01677v2-abstract-full" style="display: none;"> Ensuring the safety of Reinforcement Learning (RL) is crucial for its deployment in real-world applications. Nevertheless, managing the trade-off between reward and safety during exploration presents a significant challenge. Improving reward performance through policy adjustments may adversely affect safety performance. In this study, we aim to address this conflicting relation by leveraging the theory of gradient manipulation. Initially, we analyze the conflict between reward and safety gradients. Subsequently, we tackle the balance between reward and safety optimization by proposing a soft switching policy optimization method, for which we provide convergence analysis. Based on our theoretical examination, we provide a safe RL framework to overcome the aforementioned challenge, and we develop a Safety-MuJoCo Benchmark to assess the performance of safe RL algorithms. Finally, we evaluate the effectiveness of our method on the Safety-MuJoCo Benchmark and a popular safe RL benchmark, Omnisafe. Experimental results demonstrate that our algorithms outperform several state-of-the-art baselines in terms of balancing reward and safety optimization. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.01677v2-abstract-full').style.display = 'none'; document.getElementById('2405.01677v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.14248">arXiv:2404.14248</a> <span> [<a href="https://arxiv.org/pdf/2404.14248">pdf</a>, <a href="https://arxiv.org/format/2404.14248">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> NTIRE 2024 Challenge on Low Light Image Enhancement: Methods and Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Liu%2C+X">Xiaoning Liu</a>, <a href="/search/?searchtype=author&query=Wu%2C+Z">Zongwei Wu</a>, <a href="/search/?searchtype=author&query=Li%2C+A">Ao Li</a>, <a href="/search/?searchtype=author&query=Vasluianu%2C+F">Florin-Alexandru Vasluianu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yulun Zhang</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shuhang Gu</a>, <a href="/search/?searchtype=author&query=Zhang%2C+L">Le Zhang</a>, <a href="/search/?searchtype=author&query=Zhu%2C+C">Ce Zhu</a>, <a href="/search/?searchtype=author&query=Timofte%2C+R">Radu Timofte</a>, <a href="/search/?searchtype=author&query=Jin%2C+Z">Zhi Jin</a>, <a href="/search/?searchtype=author&query=Wu%2C+H">Hongjun Wu</a>, <a href="/search/?searchtype=author&query=Wang%2C+C">Chenxi Wang</a>, <a href="/search/?searchtype=author&query=Ling%2C+H">Haitao Ling</a>, <a href="/search/?searchtype=author&query=Cai%2C+Y">Yuanhao Cai</a>, <a href="/search/?searchtype=author&query=Bian%2C+H">Hao Bian</a>, <a href="/search/?searchtype=author&query=Zheng%2C+Y">Yuxin Zheng</a>, <a href="/search/?searchtype=author&query=Lin%2C+J">Jing Lin</a>, <a href="/search/?searchtype=author&query=Yuille%2C+A">Alan Yuille</a>, <a href="/search/?searchtype=author&query=Shao%2C+B">Ben Shao</a>, <a href="/search/?searchtype=author&query=Guo%2C+J">Jin Guo</a>, <a href="/search/?searchtype=author&query=Liu%2C+T">Tianli Liu</a>, <a href="/search/?searchtype=author&query=Wu%2C+M">Mohao Wu</a>, <a href="/search/?searchtype=author&query=Feng%2C+Y">Yixu Feng</a>, <a href="/search/?searchtype=author&query=Hou%2C+S">Shuo Hou</a>, <a href="/search/?searchtype=author&query=Lin%2C+H">Haotian Lin</a> , et al. (87 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.14248v1-abstract-short" style="display: inline;"> This paper reviews the NTIRE 2024 low light image enhancement challenge, highlighting the proposed solutions and results. The aim of this challenge is to discover an effective network design or solution capable of generating brighter, clearer, and visually appealing results when dealing with a variety of conditions, including ultra-high resolution (4K and beyond), non-uniform illumination, backlig… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14248v1-abstract-full').style.display = 'inline'; document.getElementById('2404.14248v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.14248v1-abstract-full" style="display: none;"> This paper reviews the NTIRE 2024 low light image enhancement challenge, highlighting the proposed solutions and results. The aim of this challenge is to discover an effective network design or solution capable of generating brighter, clearer, and visually appealing results when dealing with a variety of conditions, including ultra-high resolution (4K and beyond), non-uniform illumination, backlighting, extreme darkness, and night scenes. A notable total of 428 participants registered for the challenge, with 22 teams ultimately making valid submissions. This paper meticulously evaluates the state-of-the-art advancements in enhancing low-light images, reflecting the significant progress and creativity in this field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.14248v1-abstract-full').style.display = 'none'; document.getElementById('2404.14248v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NTIRE 2024 Challenge Report</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2404.06777">arXiv:2404.06777</a> <span> [<a href="https://arxiv.org/pdf/2404.06777">pdf</a>, <a href="https://arxiv.org/format/2404.06777">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Responsible Federated Learning in Smart Transportation: Outlooks and Challenges </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Huang%2C+X">Xiaowen Huang</a>, <a href="/search/?searchtype=author&query=Huang%2C+T">Tao Huang</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shushi Gu</a>, <a href="/search/?searchtype=author&query=Zhao%2C+S">Shuguang Zhao</a>, <a href="/search/?searchtype=author&query=Zhang%2C+G">Guanglin Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2404.06777v1-abstract-short" style="display: inline;"> Integrating artificial intelligence (AI) and federated learning (FL) in smart transportation has raised critical issues regarding their responsible use. Ensuring responsible AI is paramount for the stability and sustainability of intelligent transportation systems. Despite its importance, research on the responsible application of AI and FL in this domain remains nascent, with a paucity of in-dept… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06777v1-abstract-full').style.display = 'inline'; document.getElementById('2404.06777v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2404.06777v1-abstract-full" style="display: none;"> Integrating artificial intelligence (AI) and federated learning (FL) in smart transportation has raised critical issues regarding their responsible use. Ensuring responsible AI is paramount for the stability and sustainability of intelligent transportation systems. Despite its importance, research on the responsible application of AI and FL in this domain remains nascent, with a paucity of in-depth investigations into their confluence. Our study analyzes the roles of FL in smart transportation, as well as the promoting effect of responsible AI on distributed smart transportation. Lastly, we discuss the challenges of developing and implementing responsible FL in smart transportation and propose potential solutions. By integrating responsible AI and federated learning, intelligent transportation systems are expected to achieve a higher degree of intelligence, personalization, safety, and transparency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2404.06777v1-abstract-full').style.display = 'none'; document.getElementById('2404.06777v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.17421">arXiv:2403.17421</a> <span> [<a href="https://arxiv.org/pdf/2403.17421">pdf</a>, <a href="https://arxiv.org/format/2403.17421">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MA4DIV: Multi-Agent Reinforcement Learning for Search Result Diversification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chen%2C+Y">Yiqun Chen</a>, <a href="/search/?searchtype=author&query=Mao%2C+J">Jiaxin Mao</a>, <a href="/search/?searchtype=author&query=Zhang%2C+Y">Yi Zhang</a>, <a href="/search/?searchtype=author&query=Ma%2C+D">Dehong Ma</a>, <a href="/search/?searchtype=author&query=Xia%2C+L">Long Xia</a>, <a href="/search/?searchtype=author&query=Fan%2C+J">Jun Fan</a>, <a href="/search/?searchtype=author&query=Shi%2C+D">Daiting Shi</a>, <a href="/search/?searchtype=author&query=Cheng%2C+Z">Zhicong Cheng</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Simiu Gu</a>, <a href="/search/?searchtype=author&query=Yin%2C+D">Dawei Yin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.17421v2-abstract-short" style="display: inline;"> The objective of search result diversification (SRD) is to ensure that selected documents cover as many different subtopics as possible. Existing methods primarily utilize a paradigm of "greedy selection", i.e., selecting one document with the highest diversity score at a time. These approaches tend to be inefficient and are easily trapped in a suboptimal state. In addition, some other methods aim… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.17421v2-abstract-full').style.display = 'inline'; document.getElementById('2403.17421v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.17421v2-abstract-full" style="display: none;"> The objective of search result diversification (SRD) is to ensure that selected documents cover as many different subtopics as possible. Existing methods primarily utilize a paradigm of "greedy selection", i.e., selecting one document with the highest diversity score at a time. These approaches tend to be inefficient and are easily trapped in a suboptimal state. In addition, some other methods aim to approximately optimize the diversity metric, such as $伪$-NDCG, but the results still remain suboptimal. To address these challenges, we introduce Multi-Agent reinforcement learning (MARL) for search result DIVersity, which called MA4DIV. In this approach, each document is an agent and the search result diversification is modeled as a cooperative task among multiple agents. This approach allows for directly optimizing the diversity metrics, such as $伪$-NDCG, while achieving high training efficiency. We conducted preliminary experiments on public TREC datasets to demonstrate the effectiveness and potential of MA4DIV. Considering the limited number of queries in public TREC datasets, we construct a large-scale dataset from industry sources and show that MA4DIV achieves substantial improvements in both effectiveness and efficiency than existing baselines on a industrial scale dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.17421v2-abstract-full').style.display = 'none'; document.getElementById('2403.17421v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.16499">arXiv:2403.16499</a> <span> [<a href="https://arxiv.org/pdf/2403.16499">pdf</a>, <a href="https://arxiv.org/format/2403.16499">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.media.2024.103151">10.1016/j.media.2024.103151 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Self-Supervised Learning for Medical Image Data with Anatomy-Oriented Imaging Planes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Zhang%2C+T">Tianwei Zhang</a>, <a href="/search/?searchtype=author&query=Wei%2C+D">Dong Wei</a>, <a href="/search/?searchtype=author&query=Zhu%2C+M">Mengmeng Zhu</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shi Gu</a>, <a href="/search/?searchtype=author&query=Zheng%2C+Y">Yefeng Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.16499v2-abstract-short" style="display: inline;"> Self-supervised learning has emerged as a powerful tool for pretraining deep networks on unlabeled data, prior to transfer learning of target tasks with limited annotation. The relevance between the pretraining pretext and target tasks is crucial to the success of transfer learning. Various pretext tasks have been proposed to utilize properties of medical image data (e.g., three dimensionality), w… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16499v2-abstract-full').style.display = 'inline'; document.getElementById('2403.16499v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.16499v2-abstract-full" style="display: none;"> Self-supervised learning has emerged as a powerful tool for pretraining deep networks on unlabeled data, prior to transfer learning of target tasks with limited annotation. The relevance between the pretraining pretext and target tasks is crucial to the success of transfer learning. Various pretext tasks have been proposed to utilize properties of medical image data (e.g., three dimensionality), which are more relevant to medical image analysis than generic ones for natural images. However, previous work rarely paid attention to data with anatomy-oriented imaging planes, e.g., standard cardiac magnetic resonance imaging views. As these imaging planes are defined according to the anatomy of the imaged organ, pretext tasks effectively exploiting this information can pretrain the networks to gain knowledge on the organ of interest. In this work, we propose two complementary pretext tasks for this group of medical image data based on the spatial relationship of the imaging planes. The first is to learn the relative orientation between the imaging planes and implemented as regressing their intersecting lines. The second exploits parallel imaging planes to regress their relative slice locations within a stack. Both pretext tasks are conceptually straightforward and easy to implement, and can be combined in multitask learning for better representation learning. Thorough experiments on two anatomical structures (heart and knee) and representative target tasks (semantic segmentation and classification) demonstrate that the proposed pretext tasks are effective in pretraining deep networks for remarkably boosted performance on the target tasks, and superior to other recent approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16499v2-abstract-full').style.display = 'none'; document.getElementById('2403.16499v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Medical Image Analysis</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.16001">arXiv:2403.16001</a> <span> [<a href="https://arxiv.org/pdf/2403.16001">pdf</a>, <a href="https://arxiv.org/format/2403.16001">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Fine-Grained Assertion-Based Test Selection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gu%2C+S">Sijia Gu</a>, <a href="/search/?searchtype=author&query=Mesbah%2C+A">Ali Mesbah</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.16001v1-abstract-short" style="display: inline;"> For large software applications, running the whole test suite after each code change is time- and resource-intensive. Regression test selection techniques aim at reducing test execution time by selecting only the tests that are affected by code changes. However, existing techniques select test entities at coarse granularity levels such as test class, which causes imprecise test selection and execu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16001v1-abstract-full').style.display = 'inline'; document.getElementById('2403.16001v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.16001v1-abstract-full" style="display: none;"> For large software applications, running the whole test suite after each code change is time- and resource-intensive. Regression test selection techniques aim at reducing test execution time by selecting only the tests that are affected by code changes. However, existing techniques select test entities at coarse granularity levels such as test class, which causes imprecise test selection and executing unaffected tests. We propose a novel approach that increases the selection precision by analyzing test code at statement level and treating test assertions as the unit for selection. We implement our fine-grained test selection approach in a tool called SELERTION and evaluate it by comparing against two state-of-the-art test selection techniques using 11 open-source subjects. Our results show that SELERTION increases selection precision for all the subjects. Our test selection reduces, on average, 63% of the overall test time, making regression testing up to 23% faster than the other techniques. Our results also indicate that subjects with longer test execution time benefit more by our fine-grained selection technique. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.16001v1-abstract-full').style.display = 'none'; document.getElementById('2403.16001v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.14623">arXiv:2403.14623</a> <span> [<a href="https://arxiv.org/pdf/2403.14623">pdf</a>, <a href="https://arxiv.org/format/2403.14623">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Simplified Diffusion Schr枚dinger Bridge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Tang%2C+Z">Zhicong Tang</a>, <a href="/search/?searchtype=author&query=Hang%2C+T">Tiankai Hang</a>, <a href="/search/?searchtype=author&query=Gu%2C+S">Shuyang Gu</a>, <a href="/search/?searchtype=author&query=Chen%2C+D">Dong Chen</a>, <a href="/search/?searchtype=author&query=Guo%2C+B">Baining Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.14623v5-abstract-short" style="display: inline;"> This paper introduces a novel theoretical simplification of the Diffusion Schr枚dinger Bridge (DSB) that facilitates its unification with Score-based Generative Models (SGMs), addressing the limitations of DSB in complex data generation and enabling faster convergence and enhanced performance. By employing SGMs as an initial solution for DSB, our approach capitalizes on the strengths of both framew… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.14623v5-abstract-full').style.display = 'inline'; document.getElementById('2403.14623v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.14623v5-abstract-full" style="display: none;"> This paper introduces a novel theoretical simplification of the Diffusion Schr枚dinger Bridge (DSB) that facilitates its unification with Score-based Generative Models (SGMs), addressing the limitations of DSB in complex data generation and enabling faster convergence and enhanced performance. By employing SGMs as an initial solution for DSB, our approach capitalizes on the strengths of both frameworks, ensuring a more efficient training process and improving the performance of SGM. We also propose a reparameterization technique that, despite theoretical approximations, practically improves the network's fitting capabilities. Our extensive experimental evaluations confirm the effectiveness of the simplified DSB, demonstrating its significant improvements. We believe the contributions of this work pave the way for advanced generative modeling. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.14623v5-abstract-full').style.display = 'none'; document.getElementById('2403.14623v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Gu%2C+S&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Gu%2C+S&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Gu%2C+S&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Gu%2C+S&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Gu%2C+S&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Gu%2C+S&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>