CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 6,723 results for author: <span class="mathjax">Li, J</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Li%2C+J">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Li, J"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Li%2C+J&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Li, J"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Li%2C+J&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+J&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+J&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+J&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+J&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+J&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10248">arXiv:2502.10248</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.10248">pdf</a>, <a href="https://arxiv.org/format/2502.10248">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Step-Video-T2V Technical Report: The Practice, Challenges, and Future of Video Foundation Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ma%2C+G">Guoqing Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+H">Haoyang Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Yan%2C+K">Kun Yan</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+L">Liangyu Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Duan%2C+N">Nan Duan</a>, <a href="/search/cs?searchtype=author&amp;query=Yin%2C+S">Shengming Yin</a>, <a href="/search/cs?searchtype=author&amp;query=Wan%2C+C">Changyi Wan</a>, <a href="/search/cs?searchtype=author&amp;query=Ming%2C+R">Ranchen Ming</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+X">Xiaoniu Song</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xing Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+Y">Yu Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+D">Deshan Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+D">Deyu Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+J">Jian Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+K">Kaijun Tan</a>, <a href="/search/cs?searchtype=author&amp;query=An%2C+K">Kang An</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+M">Mei Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Ji%2C+W">Wei Ji</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Q">Qiling Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+W">Wen Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+X">Xin Han</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+Y">Yanan Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Ge%2C+Z">Zheng Ge</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+A">Aojie Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+B">Bin Wang</a> , et al. (90 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10248v1-abstract-short" style="display: inline;"> We present Step-Video-T2V, a state-of-the-art text-to-video pre-trained model with 30B parameters and the ability to generate videos up to 204 frames in length. A deep compression Variational Autoencoder, Video-VAE, is designed for video generation tasks, achieving 16x16 spatial and 8x temporal compression ratios, while maintaining exceptional video reconstruction quality. User prompts are encoded&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10248v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10248v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10248v1-abstract-full" style="display: none;"> We present Step-Video-T2V, a state-of-the-art text-to-video pre-trained model with 30B parameters and the ability to generate videos up to 204 frames in length. A deep compression Variational Autoencoder, Video-VAE, is designed for video generation tasks, achieving 16x16 spatial and 8x temporal compression ratios, while maintaining exceptional video reconstruction quality. User prompts are encoded using two bilingual text encoders to handle both English and Chinese. A DiT with 3D full attention is trained using Flow Matching and is employed to denoise input noise into latent frames. A video-based DPO approach, Video-DPO, is applied to reduce artifacts and improve the visual quality of the generated videos. We also detail our training strategies and share key observations and insights. Step-Video-T2V&#39;s performance is evaluated on a novel video generation benchmark, Step-Video-T2V-Eval, demonstrating its state-of-the-art text-to-video quality when compared with both open-source and commercial engines. Additionally, we discuss the limitations of current diffusion-based model paradigm and outline future directions for video foundation models. We make both Step-Video-T2V and Step-Video-T2V-Eval available at https://github.com/stepfun-ai/Step-Video-T2V. The online version can be accessed from https://yuewen.cn/videos as well. Our goal is to accelerate the innovation of video foundation models and empower video content creators. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10248v1-abstract-full').style.display = 'none'; document.getElementById('2502.10248v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">35 pages, 14 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09973">arXiv:2502.09973</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.09973">pdf</a>, <a href="https://arxiv.org/format/2502.09973">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3706598.3713882">10.1145/3706598.3713882 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> InteRecon: Towards Reconstructing Interactivity of Personal Memorable Items in Mixed Reality </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zisu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiawei Li</a>, <a href="/search/cs?searchtype=author&amp;query=Xiong%2C+Z">Zeyu Xiong</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Shumeng Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Faruqi%2C+F">Faraz Faruqi</a>, <a href="/search/cs?searchtype=author&amp;query=Mueller%2C+S">Stefanie Mueller</a>, <a href="/search/cs?searchtype=author&amp;query=Liang%2C+C">Chen Liang</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+X">Xiaojuan Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+M">Mingming Fan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09973v1-abstract-short" style="display: inline;"> Digital capturing of memorable personal items is a key way to archive personal memories. Although current digitization methods (e.g., photos, videos, 3D scanning) can replicate the physical appearance of an item, they often cannot preserve its real-world interactivity. We present Interactive Digital Item (IDI), a concept of reconstructing both the physical appearance and, more importantly, the int&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09973v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09973v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09973v1-abstract-full" style="display: none;"> Digital capturing of memorable personal items is a key way to archive personal memories. Although current digitization methods (e.g., photos, videos, 3D scanning) can replicate the physical appearance of an item, they often cannot preserve its real-world interactivity. We present Interactive Digital Item (IDI), a concept of reconstructing both the physical appearance and, more importantly, the interactivity of an item. We first conducted a formative study to understand users&#39; expectations of IDI, identifying key physical interactivity features, including geometry, interfaces, and embedded content of items. Informed by these findings, we developed InteRecon, an AR prototype enabling personal reconstruction functions for IDI creation. An exploratory study was conducted to assess the feasibility of using InteRecon and explore the potential of IDI to enrich personal memory archives. Results show that InteRecon is feasible for IDI creation, and the concept of IDI brings new opportunities for augmenting personal memory archives. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09973v1-abstract-full').style.display = 'none'; document.getElementById('2502.09973v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09967">arXiv:2502.09967</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.09967">pdf</a>, <a href="https://arxiv.org/format/2502.09967">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> VicKAM: Visual Conceptual Knowledge Guided Action Map for Weakly Supervised Group Activity Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zhuming Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Y">Yihao Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiarui Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yaofei Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yan Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zun Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+L">Lifang Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Liang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09967v1-abstract-short" style="display: inline;"> Existing weakly supervised group activity recognition methods rely on object detectors or attention mechanisms to capture key areas automatically. However, they overlook the semantic information associated with captured areas, which may adversely affect the recognition performance. In this paper, we propose a novel framework named Visual Conceptual Knowledge Guided Action Map (VicKAM) which effect&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09967v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09967v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09967v1-abstract-full" style="display: none;"> Existing weakly supervised group activity recognition methods rely on object detectors or attention mechanisms to capture key areas automatically. However, they overlook the semantic information associated with captured areas, which may adversely affect the recognition performance. In this paper, we propose a novel framework named Visual Conceptual Knowledge Guided Action Map (VicKAM) which effectively captures the locations of individual actions and integrates them with action semantics for weakly supervised group activity recognition.It generates individual action prototypes from training set as visual conceptual knowledge to bridge action semantics and visual representations. Guided by this knowledge, VicKAM produces action maps that indicate the likelihood of each action occurring at various locations, based on image correlation theorem. It further augments individual action maps using group activity related statistical information, representing individual action distribution under different group activities, to establish connections between action maps and specific group activities. The augmented action map is incorporated with action semantic representations for group activity recognition.Extensive experiments on two public benchmarks, the Volleyball and the NBA datasets, demonstrate the effectiveness of our proposed method, even in cases of limited training data. The code will be released later. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09967v1-abstract-full').style.display = 'none'; document.getElementById('2502.09967v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09577">arXiv:2502.09577</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.09577">pdf</a>, <a href="https://arxiv.org/format/2502.09577">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Polymind: Parallel Visual Diagramming with Large Language Models to Support Prewriting Through Microtasks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wan%2C+Q">Qian Wan</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiannan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Huanchen Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+Z">Zhicong Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09577v1-abstract-short" style="display: inline;"> Prewriting is the process of generating and organising ideas before a first draft. It consists of a combination of informal, iterative, and semi-structured strategies such as visual diagramming, which poses a challenge for collaborating with large language models (LLMs) in a turn-taking conversational manner. We present Polymind, a visual diagramming tool that leverages multiple LLM-powered agents&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09577v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09577v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09577v1-abstract-full" style="display: none;"> Prewriting is the process of generating and organising ideas before a first draft. It consists of a combination of informal, iterative, and semi-structured strategies such as visual diagramming, which poses a challenge for collaborating with large language models (LLMs) in a turn-taking conversational manner. We present Polymind, a visual diagramming tool that leverages multiple LLM-powered agents to support prewriting. The system features a parallel collaboration workflow in place of the turn-taking conversational interactions. It defines multiple ``microtasks&#39;&#39; to simulate group collaboration scenarios such as collaborative writing and group brainstorming. Instead of repetitively prompting a chatbot for various purposes, Polymind enables users to orchestrate multiple microtasks simultaneously. Users can configure and delegate customised microtasks, and manage their microtasks by specifying task requirements and toggling visibility and initiative. Our evaluation revealed that, compared to ChatGPT, users had more customizability over collaboration with Polymind, and were thus able to quickly expand personalised writing ideas during prewriting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09577v1-abstract-full').style.display = 'none'; document.getElementById('2502.09577v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to CSCW 2025 with minor revisions</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09156">arXiv:2502.09156</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.09156">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Improving TCM Question Answering through Tree-Organized Self-Reflective Retrieval with LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+C">Chang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Chang%2C+Y">Ying Chang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jianmin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Qu%2C+Y">Yiqian Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+L">Lingyong Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+S">Shuyuan Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09156v1-abstract-short" style="display: inline;"> Objectives: Large language models (LLMs) can harness medical knowledge for intelligent question answering (Q&amp;A), promising support for auxiliary diagnosis and medical talent cultivation. However, there is a deficiency of highly efficient retrieval-augmented generation (RAG) frameworks within the domain of Traditional Chinese Medicine (TCM). Our purpose is to observe the effect of the Tree-Organize&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09156v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09156v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09156v1-abstract-full" style="display: none;"> Objectives: Large language models (LLMs) can harness medical knowledge for intelligent question answering (Q&amp;A), promising support for auxiliary diagnosis and medical talent cultivation. However, there is a deficiency of highly efficient retrieval-augmented generation (RAG) frameworks within the domain of Traditional Chinese Medicine (TCM). Our purpose is to observe the effect of the Tree-Organized Self-Reflective Retrieval (TOSRR) framework on LLMs in TCM Q&amp;A tasks. Materials and Methods: We introduce the novel approach of knowledge organization, constructing a tree structure knowledge base with hierarchy. At inference time, our self-reflection framework retrieves from this knowledge base, integrating information across chapters. Questions from the TCM Medical Licensing Examination (MLE) and the college Classics Course Exam (CCE) were randomly selected as benchmark datasets. Results: By coupling with GPT-4, the framework can improve the best performance on the TCM MLE benchmark by 19.85% in absolute accuracy, and improve recall accuracy from 27% to 38% on CCE datasets. In manual evaluation, the framework improves a total of 18.52 points across dimensions of safety, consistency, explainability, compliance, and coherence. Conclusion: The TOSRR framework can effectively improve LLM&#39;s capability in Q&amp;A tasks of TCM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09156v1-abstract-full').style.display = 'none'; document.getElementById('2502.09156v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09125">arXiv:2502.09125</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.09125">pdf</a>, <a href="https://arxiv.org/format/2502.09125">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Automatic Pruning via Structured Lasso with Class-wise Information </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xiang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+M">Mingchen Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xia Li</a>, <a href="/search/cs?searchtype=author&amp;query=Qu%2C+L">Leigang Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+Z">Zifan Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+Y">Yijun Song</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zemin Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+L">Linshan Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jialin Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09125v1-abstract-short" style="display: inline;"> Most pruning methods concentrate on unimportant filters of neural networks. However, they face the loss of statistical information due to a lack of consideration for class-wise data. In this paper, from the perspective of leveraging precise class-wise information for model pruning, we utilize structured lasso with guidance from Information Bottleneck theory. Our approach ensures that statistical i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09125v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09125v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09125v1-abstract-full" style="display: none;"> Most pruning methods concentrate on unimportant filters of neural networks. However, they face the loss of statistical information due to a lack of consideration for class-wise data. In this paper, from the perspective of leveraging precise class-wise information for model pruning, we utilize structured lasso with guidance from Information Bottleneck theory. Our approach ensures that statistical information is retained during the pruning process. With these techniques, we introduce two innovative adaptive network pruning schemes: sparse graph-structured lasso pruning with Information Bottleneck (\textbf{sGLP-IB}) and sparse tree-guided lasso pruning with Information Bottleneck (\textbf{sTLP-IB}). The key aspect is pruning model filters using sGLP-IB and sTLP-IB to better capture class-wise relatedness. Compared to multiple state-of-the-art methods, our approaches demonstrate superior performance across three datasets and six model architectures in extensive experiments. For instance, using the VGG16 model on the CIFAR-10 dataset, we achieve a parameter reduction of 85%, a decrease in FLOPs by 61%, and maintain an accuracy of 94.10% (0.14% higher than the original model); we reduce the parameters by 55% with the accuracy at 76.12% using the ResNet architecture on ImageNet (only drops 0.03%). In summary, we successfully reduce model size and computational resource usage while maintaining accuracy. Our codes are at https://anonymous.4open.science/r/IJCAI-8104. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09125v1-abstract-full').style.display = 'none'; document.getElementById('2502.09125v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09104">arXiv:2502.09104</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.09104">pdf</a>, <a href="https://arxiv.org/ps/2502.09104">ps</a>, <a href="https://arxiv.org/format/2502.09104">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> One-shot Federated Learning Methods: A Practical Guide </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xiang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+Z">Zhenheng Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+X">Xia Li</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+Y">Yijun Song</a>, <a href="/search/cs?searchtype=author&amp;query=Ji%2C+S">Sijie Ji</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zemin Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Han%2C+B">Bo Han</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+L">Linshan Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jialin Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09104v1-abstract-short" style="display: inline;"> One-shot Federated Learning (OFL) is a distributed machine learning paradigm that constrains client-server communication to a single round, addressing privacy and communication overhead issues associated with multiple rounds of data exchange in traditional Federated Learning (FL). OFL demonstrates the practical potential for integration with future approaches that require collaborative training mo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09104v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09104v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09104v1-abstract-full" style="display: none;"> One-shot Federated Learning (OFL) is a distributed machine learning paradigm that constrains client-server communication to a single round, addressing privacy and communication overhead issues associated with multiple rounds of data exchange in traditional Federated Learning (FL). OFL demonstrates the practical potential for integration with future approaches that require collaborative training models, such as large language models (LLMs). However, current OFL methods face two major challenges: data heterogeneity and model heterogeneity, which result in subpar performance compared to conventional FL methods. Worse still, despite numerous studies addressing these limitations, a comprehensive summary is still lacking. To address these gaps, this paper presents a systematic analysis of the challenges faced by OFL and thoroughly reviews the current methods. We also offer an innovative categorization method and analyze the trade-offs of various techniques. Additionally, we discuss the most promising future directions and the technologies that should be integrated into the OFL field. This work aims to provide guidance and insights for future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09104v1-abstract-full').style.display = 'none'; document.getElementById('2502.09104v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09101">arXiv:2502.09101</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.09101">pdf</a>, <a href="https://arxiv.org/format/2502.09101">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Bridging the Gap Between LLMs and Human Intentions: Progresses and Challenges in Instruction Understanding, Intention Reasoning, and Reliable Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chang%2C+Z">Zongyu Chang</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+F">Feihong Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Z">Ziqin Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Q">Qian Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ji%2C+C">Cheng Ji</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zhuo Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+R">Ruifeng Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+Y">Yangqiu Song</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Shangguang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jianxin Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09101v1-abstract-short" style="display: inline;"> Large language models (LLMs) have demonstrated exceptional capabilities in understanding and generation. However, when interacting with human instructions in real-world scenarios, LLMs still face significant challenges, particularly in accurately capturing and comprehending human instructions and intentions. This paper focuses on three challenges in LLM-based text generation tasks: instruction und&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09101v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09101v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09101v1-abstract-full" style="display: none;"> Large language models (LLMs) have demonstrated exceptional capabilities in understanding and generation. However, when interacting with human instructions in real-world scenarios, LLMs still face significant challenges, particularly in accurately capturing and comprehending human instructions and intentions. This paper focuses on three challenges in LLM-based text generation tasks: instruction understanding, intention reasoning, and reliable generation. Regarding human complex instruction, LLMs have deficiencies in understanding long contexts and instructions in multi-round conversations. For intention reasoning, LLMs may have inconsistent command reasoning, difficulty reasoning about commands containing incorrect information, difficulty understanding user ambiguous language commands, and a weak understanding of user intention in commands. Besides, In terms of reliable generation, LLMs may have unstable generated content and unethical generation. To this end, we classify and analyze the performance of LLMs in challenging scenarios and conduct a comprehensive evaluation of existing solutions. Furthermore, we introduce benchmarks and categorize them based on the aforementioned three core challenges. Finally, we explore potential directions for future research to enhance the reliability and adaptability of LLMs in real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09101v1-abstract-full').style.display = 'none'; document.getElementById('2502.09101v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09038">arXiv:2502.09038</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.09038">pdf</a>, <a href="https://arxiv.org/format/2502.09038">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> AoI-Sensitive Data Forwarding with Distributed Beamforming in UAV-Assisted IoT </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lang%2C+Z">Zifan Lang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+G">Guixia Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+G">Geng Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiahui Li</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Z">Zemin Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Leung%2C+V+C+M">Victor C. M. Leung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09038v1-abstract-short" style="display: inline;"> This paper proposes a UAV-assisted forwarding system based on distributed beamforming to enhance age of information (AoI) in Internet of Things (IoT). Specifically, UAVs collect and relay data between sensor nodes (SNs) and the remote base station (BS). However, flight delays increase the AoI and degrade the network performance. To mitigate this, we adopt distributed beamforming to extend the comm&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09038v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09038v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09038v1-abstract-full" style="display: none;"> This paper proposes a UAV-assisted forwarding system based on distributed beamforming to enhance age of information (AoI) in Internet of Things (IoT). Specifically, UAVs collect and relay data between sensor nodes (SNs) and the remote base station (BS). However, flight delays increase the AoI and degrade the network performance. To mitigate this, we adopt distributed beamforming to extend the communication range, reduce the flight frequency and ensure the continuous data relay and efficient energy utilization. Then, we formulate an optimization problem to minimize AoI and UAV energy consumption, by jointly optimizing the UAV trajectories and communication schedules. The problem is non-convex and with high dynamic, and thus we propose a deep reinforcement learning (DRL)-based algorithm to solve the problem, thereby enhancing the stability and accelerate convergence speed. Simulation results show that the proposed algorithm effectively addresses the problem and outperforms other benchmark algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09038v1-abstract-full').style.display = 'none'; document.getElementById('2502.09038v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 4 figures, ICC2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08946">arXiv:2502.08946</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.08946">pdf</a>, <a href="https://arxiv.org/format/2502.08946">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> The Stochastic Parrot on LLM&#39;s Shoulder: A Summative Assessment of Physical Concept Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yu%2C+M">Mo Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+L">Lemao Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+J">Junjie Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Chung%2C+T+T">Tsz Ting Chung</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Shunchi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiangnan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yeung%2C+D">Dit-Yan Yeung</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+J">Jie Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08946v1-abstract-short" style="display: inline;"> In a systematic way, we investigate a widely asked question: Do LLMs really understand what they say?, which relates to the more familiar term Stochastic Parrot. To this end, we propose a summative assessment over a carefully designed physical concept understanding task, PhysiCo. Our task alleviates the memorization issue via the usage of grid-format inputs that abstractly describe physical phenom&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08946v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08946v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08946v1-abstract-full" style="display: none;"> In a systematic way, we investigate a widely asked question: Do LLMs really understand what they say?, which relates to the more familiar term Stochastic Parrot. To this end, we propose a summative assessment over a carefully designed physical concept understanding task, PhysiCo. Our task alleviates the memorization issue via the usage of grid-format inputs that abstractly describe physical phenomena. The grids represents varying levels of understanding, from the core phenomenon, application examples to analogies to other abstract patterns in the grid world. A comprehensive study on our task demonstrates: (1) state-of-the-art LLMs, including GPT-4o, o1 and Gemini 2.0 flash thinking, lag behind humans by ~40%; (2) the stochastic parrot phenomenon is present in LLMs, as they fail on our grid task but can describe and recognize the same concepts well in natural language; (3) our task challenges the LLMs due to intrinsic difficulties rather than the unfamiliar grid format, as in-context learning and fine-tuning on same formatted data added little to their performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08946v1-abstract-full').style.display = 'none'; document.getElementById('2502.08946v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NAACL 2025 Main Conference. First 5 authors contributed equally. Project page: https://physico-benchmark.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08940">arXiv:2502.08940</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.08940">pdf</a>, <a href="https://arxiv.org/format/2502.08940">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Towards Understanding Why Data Augmentation Improves Generalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jingyang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+J">Jiachun Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Toh%2C+K">Kim-Chuan Toh</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+P">Pan Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08940v1-abstract-short" style="display: inline;"> Data augmentation is a cornerstone technique in deep learning, widely used to improve model generalization. Traditional methods like random cropping and color jittering, as well as advanced techniques such as CutOut, Mixup, and CutMix, have achieved notable success across various domains. However, the mechanisms by which data augmentation improves generalization remain poorly understood, and exist&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08940v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08940v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08940v1-abstract-full" style="display: none;"> Data augmentation is a cornerstone technique in deep learning, widely used to improve model generalization. Traditional methods like random cropping and color jittering, as well as advanced techniques such as CutOut, Mixup, and CutMix, have achieved notable success across various domains. However, the mechanisms by which data augmentation improves generalization remain poorly understood, and existing theoretical analyses typically focus on individual techniques without a unified explanation. In this work, we present a unified theoretical framework that elucidates how data augmentation enhances generalization through two key effects: partial semantic feature removal and feature mixing. Partial semantic feature removal reduces the model&#39;s reliance on individual feature, promoting diverse feature learning and better generalization. Feature mixing, by scaling down original semantic features and introducing noise, increases training complexity, driving the model to develop more robust features. Advanced methods like CutMix integrate both effects, achieving complementary benefits. Our theoretical insights are further supported by experimental results, validating the effectiveness of this unified perspective. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08940v1-abstract-full').style.display = 'none'; document.getElementById('2502.08940v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08893">arXiv:2502.08893</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.08893">pdf</a>, <a href="https://arxiv.org/format/2502.08893">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Uncovering Disparities in Rideshare Drivers Earning and Work Patterns: A Case Study of Chicago </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dang%2C+H">Hy Dang</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+Y">Yuwen Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Spicer%2C+J">Jason Spicer</a>, <a href="/search/cs?searchtype=author&amp;query=Kay%2C+T">Tamara Kay</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+D">Di Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yang Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Brockman%2C+J">Jay Brockman</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+M">Meng Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+T+J">Toby Jia-Jun Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08893v1-abstract-short" style="display: inline;"> Ride-sharing services are revolutionizing urban mobility while simultaneously raising significant concerns regarding fairness and driver equity. This study employs Chicago Trip Network Provider dataset to investigate disparities in ride-sharing earnings between 2018 and 2023. Our analysis reveals marked temporal shifts, including an earnings surge in early 2021 followed by fluctuations and a decli&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08893v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08893v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08893v1-abstract-full" style="display: none;"> Ride-sharing services are revolutionizing urban mobility while simultaneously raising significant concerns regarding fairness and driver equity. This study employs Chicago Trip Network Provider dataset to investigate disparities in ride-sharing earnings between 2018 and 2023. Our analysis reveals marked temporal shifts, including an earnings surge in early 2021 followed by fluctuations and a decline in inflation-adjusted income, as well as pronounced spatial disparities, with drivers in Central and airport regions earning substantially more than those in peripheral areas. Recognizing the limitations of trip-level data, we introduce a novel trip-driver assignment algorithm to reconstruct plausible daily work patterns, uncovering distinct driver clusters with varied earning profiles. Notably, drivers operating during late-evening and overnight hours secure higher per-trip and hourly rates, while emerging groups in low-demand regions face significant earnings deficits. Our findings call for more transparent pricing models and a re-examination of platform design to promote equitable driver outcomes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08893v1-abstract-full').style.display = 'none'; document.getElementById('2502.08893v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08882">arXiv:2502.08882</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.08882">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> 2D Integrated Bayesian Tomography of Plasma Electron Density Profile for HL-3 Based on Gaussian Process </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+C">Cong Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+R">Renjie Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Z">Zongyu Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zhijun Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wei%2C+Y">Yixiong Wei</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jing Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08882v1-abstract-short" style="display: inline;"> This paper introduces an integrated Bayesian model that combines line integral measurements and point values using Gaussian Process (GP). The proposed method leverages Gaussian Process Regression (GPR) to incorporate point values into 2D profiles and employs coordinate mapping to integrate magnetic flux information for 2D inversion. The average relative error of the reconstructed profile, using th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08882v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08882v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08882v1-abstract-full" style="display: none;"> This paper introduces an integrated Bayesian model that combines line integral measurements and point values using Gaussian Process (GP). The proposed method leverages Gaussian Process Regression (GPR) to incorporate point values into 2D profiles and employs coordinate mapping to integrate magnetic flux information for 2D inversion. The average relative error of the reconstructed profile, using the integrated Bayesian tomography model with normalized magnetic flux, is as low as 3.60*10^(-4). Additionally, sensitivity tests were conducted on the number of grids, the standard deviation of synthetic diagnostic data, and noise levels, laying a solid foundation for the application of the model to experimental data. This work not only achieves accurate 2D inversion using the integrated Bayesian model but also provides a robust framework for decoupling pressure information from equilibrium reconstruction, thus making it possible to optimize equilibrium reconstruction using inversion results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08882v1-abstract-full').style.display = 'none'; document.getElementById('2502.08882v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08836">arXiv:2502.08836</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.08836">pdf</a>, <a href="https://arxiv.org/ps/2502.08836">ps</a>, <a href="https://arxiv.org/format/2502.08836">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Survey on Single-Image Reflection Removal using Deep Learning Techniques </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yang%2C+K">Kangning Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+H">Huiming Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Cai%2C+J">Jie Cai</a>, <a href="/search/cs?searchtype=author&amp;query=Fu%2C+L">Lan Fu</a>, <a href="/search/cs?searchtype=author&amp;query=Ding%2C+J">Jiaming Ding</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jinlong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ho%2C+C+M">Chiu Man Ho</a>, <a href="/search/cs?searchtype=author&amp;query=Meng%2C+Z">Zibo Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08836v1-abstract-short" style="display: inline;"> The phenomenon of reflection is quite common in digital images, posing significant challenges for various applications such as computer vision, photography, and image processing. Traditional methods for reflection removal often struggle to achieve clean results while maintaining high fidelity and robustness, particularly in real-world scenarios. Over the past few decades, numerous deep learning-ba&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08836v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08836v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08836v1-abstract-full" style="display: none;"> The phenomenon of reflection is quite common in digital images, posing significant challenges for various applications such as computer vision, photography, and image processing. Traditional methods for reflection removal often struggle to achieve clean results while maintaining high fidelity and robustness, particularly in real-world scenarios. Over the past few decades, numerous deep learning-based approaches for reflection removal have emerged, yielding impressive results. In this survey, we conduct a comprehensive review of the current literature by focusing on key venues such as ICCV, ECCV, CVPR, NeurIPS, etc., as these conferences and journals have been central to advances in the field. Our review follows a structured paper selection process, and we critically assess both single-stage and two-stage deep learning methods for reflection removal. The contribution of this survey is three-fold: first, we provide a comprehensive summary of the most recent work on single-image reflection removal; second, we outline task hypotheses, current deep learning techniques, publicly available datasets, and relevant evaluation metrics; and third, we identify key challenges and opportunities in deep learning-based reflection removal, highlighting the potential of this rapidly evolving research area. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08836v1-abstract-full').style.display = 'none'; document.getElementById('2502.08836v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08660">arXiv:2502.08660</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.08660">pdf</a>, <a href="https://arxiv.org/format/2502.08660">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Semantic Role Labeling: A Systematical Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+H">Huiyao Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+M">Meishan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jing Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+M">Min Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=%C3%98vrelid%2C+L">Lilja 脴vrelid</a>, <a href="/search/cs?searchtype=author&amp;query=Haji%C4%8D%2C+J">Jan Haji膷</a>, <a href="/search/cs?searchtype=author&amp;query=Fei%2C+H">Hao Fei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08660v1-abstract-short" style="display: inline;"> Semantic role labeling (SRL) is a central natural language processing (NLP) task aiming to understand the semantic roles within texts, facilitating a wide range of downstream applications. While SRL has garnered extensive and enduring research, there is currently a lack of a comprehensive survey that thoroughly organizes and synthesizes the field. This paper aims to review the entire research traj&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08660v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08660v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08660v1-abstract-full" style="display: none;"> Semantic role labeling (SRL) is a central natural language processing (NLP) task aiming to understand the semantic roles within texts, facilitating a wide range of downstream applications. While SRL has garnered extensive and enduring research, there is currently a lack of a comprehensive survey that thoroughly organizes and synthesizes the field. This paper aims to review the entire research trajectory of the SRL community over the past two decades. We begin by providing a complete definition of SRL. To offer a comprehensive taxonomy, we categorize SRL methodologies into four key perspectives: model architectures, syntax feature modeling, application scenarios, and multi-modal extensions. Further, we discuss SRL benchmarks, evaluation metrics, and paradigm modeling approaches, while also exploring practical applications across various domains. Finally, we analyze future research directions in SRL, addressing the evolving role of SRL in the age of large language models (LLMs) and its potential impact on the broader NLP landscape. We maintain a public repository and consistently update related resources at: https://github.com/DreamH1gh/Awesome-SRL <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08660v1-abstract-full').style.display = 'none'; document.getElementById('2502.08660v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08590">arXiv:2502.08590</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.08590">pdf</a>, <a href="https://arxiv.org/format/2502.08590">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Light-A-Video: Training-free Video Relighting via Progressive Light Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+Y">Yujie Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Bu%2C+J">Jiazi Bu</a>, <a href="/search/cs?searchtype=author&amp;query=Ling%2C+P">Pengyang Ling</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+P">Pan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+T">Tong Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Q">Qidong Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jinsong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Dong%2C+X">Xiaoyi Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Zang%2C+Y">Yuhang Zang</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+Y">Yuhang Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Rao%2C+A">Anyi Rao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jiaqi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Niu%2C+L">Li Niu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08590v1-abstract-short" style="display: inline;"> Recent advancements in image relighting models, driven by large-scale datasets and pre-trained diffusion models, have enabled the imposition of consistent lighting. However, video relighting still lags, primarily due to the excessive training costs and the scarcity of diverse, high-quality video relighting datasets. A simple application of image relighting models on a frame-by-frame basis leads to&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08590v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08590v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08590v1-abstract-full" style="display: none;"> Recent advancements in image relighting models, driven by large-scale datasets and pre-trained diffusion models, have enabled the imposition of consistent lighting. However, video relighting still lags, primarily due to the excessive training costs and the scarcity of diverse, high-quality video relighting datasets. A simple application of image relighting models on a frame-by-frame basis leads to several issues: lighting source inconsistency and relighted appearance inconsistency, resulting in flickers in the generated videos. In this work, we propose Light-A-Video, a training-free approach to achieve temporally smooth video relighting. Adapted from image relighting models, Light-A-Video introduces two key techniques to enhance lighting consistency. First, we design a Consistent Light Attention (CLA) module, which enhances cross-frame interactions within the self-attention layers to stabilize the generation of the background lighting source. Second, leveraging the physical principle of light transport independence, we apply linear blending between the source video&#39;s appearance and the relighted appearance, using a Progressive Light Fusion (PLF) strategy to ensure smooth temporal transitions in illumination. Experiments show that Light-A-Video improves the temporal consistency of relighted video while maintaining the image quality, ensuring coherent lighting transitions across frames. Project page: https://bujiazi.github.io/light-a-video.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08590v1-abstract-full').style.display = 'none'; document.getElementById('2502.08590v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project Page: https://bujiazi.github.io/light-a-video.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08525">arXiv:2502.08525</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.08525">pdf</a>, <a href="https://arxiv.org/format/2502.08525">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> Checkerboard Target Measurement in Unordered Point Clouds with Coloured ICP </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Goo%2C+J+M">June Moh Goo</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jialun Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wicaksono%2C+D">Darmawan Wicaksono</a>, <a href="/search/cs?searchtype=author&amp;query=Boehm%2C+J">Jan Boehm</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08525v1-abstract-short" style="display: inline;"> In this work, we investigate the problem of measuring a the centre checkerboard target in an 3D point cloud. This is an important problem which has applications in registration, long term monitoring and linking to other sensor systems. We use a 3D template matching approach based on the coloured ICP algorithm to solve the problem. We tackle the problem under the additional constraints that we assu&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08525v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08525v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08525v1-abstract-full" style="display: none;"> In this work, we investigate the problem of measuring a the centre checkerboard target in an 3D point cloud. This is an important problem which has applications in registration, long term monitoring and linking to other sensor systems. We use a 3D template matching approach based on the coloured ICP algorithm to solve the problem. We tackle the problem under the additional constraints that we assume no structure in the 3D data in order to be able to handle unordered point clouds. This gives us the capability to process data from the new generation of low-cost LIDAR sensors. This category of sensors also suffers from increased noise in range and reflectivity measurement. We provide extensive simulation results using synthetic data to capture the potential of the approach. We then give the detailed steps for handling real sensor data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08525v1-abstract-full').style.display = 'none'; document.getElementById('2502.08525v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08512">arXiv:2502.08512</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.08512">pdf</a>, <a href="https://arxiv.org/format/2502.08512">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Measuring Diversity in Synthetic Datasets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Y">Yuchang Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+H">Huizhe Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+B">Bingzhe Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jintang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Z">Zibin Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+P">Peilin Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+L">Liang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Bian%2C+Y">Yatao Bian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08512v1-abstract-short" style="display: inline;"> Large language models (LLMs) are widely adopted to generate synthetic datasets for various natural language processing (NLP) tasks, such as text classification and summarization. However, accurately measuring the diversity of these synthetic datasets-an aspect crucial for robust model performance-remains a significant challenge. In this paper, we introduce DCScore, a novel method for measuring syn&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08512v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08512v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08512v1-abstract-full" style="display: none;"> Large language models (LLMs) are widely adopted to generate synthetic datasets for various natural language processing (NLP) tasks, such as text classification and summarization. However, accurately measuring the diversity of these synthetic datasets-an aspect crucial for robust model performance-remains a significant challenge. In this paper, we introduce DCScore, a novel method for measuring synthetic dataset diversity from a classification perspective. Specifically, DCScore formulates diversity evaluation as a sample classification task, leveraging mutual relationships among samples. We further provide theoretical verification of the diversity-related axioms satisfied by DCScore, highlighting its role as a principled diversity evaluation method. Experimental results on synthetic datasets reveal that DCScore enjoys a stronger correlation with multiple diversity pseudo-truths of evaluated datasets, underscoring its effectiveness. Moreover, both empirical and theoretical evidence demonstrate that DCScore substantially reduces computational costs compared to existing approaches. Code is available at: https://github.com/BlueWhaleLab/DCScore. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08512v1-abstract-full').style.display = 'none'; document.getElementById('2502.08512v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08486">arXiv:2502.08486</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.08486">pdf</a>, <a href="https://arxiv.org/format/2502.08486">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Referring Remote Sensing Image Segmentation via Bidirectional Alignment Guided Joint Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+T">Tianxiang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Wen%2C+Z">Zhaokun Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Kong%2C+B">Bo Kong</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+K">Kecheng Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yisi Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhuang%2C+P">Peixian Zhuang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiangyun Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08486v1-abstract-short" style="display: inline;"> Referring Remote Sensing Image Segmentation (RRSIS) is critical for ecological monitoring, urban planning, and disaster management, requiring precise segmentation of objects in remote sensing imagery guided by textual descriptions. This task is uniquely challenging due to the considerable vision-language gap, the high spatial resolution and broad coverage of remote sensing imagery with diverse cat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08486v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08486v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08486v1-abstract-full" style="display: none;"> Referring Remote Sensing Image Segmentation (RRSIS) is critical for ecological monitoring, urban planning, and disaster management, requiring precise segmentation of objects in remote sensing imagery guided by textual descriptions. This task is uniquely challenging due to the considerable vision-language gap, the high spatial resolution and broad coverage of remote sensing imagery with diverse categories and small targets, and the presence of clustered, unclear targets with blurred edges. To tackle these issues, we propose \ours, a novel framework designed to bridge the vision-language gap, enhance multi-scale feature interaction, and improve fine-grained object differentiation. Specifically, \ours introduces: (1) the Bidirectional Spatial Correlation (BSC) for improved vision-language feature alignment, (2) the Target-Background TwinStream Decoder (T-BTD) for precise distinction between targets and non-targets, and (3) the Dual-Modal Object Learning Strategy (D-MOLS) for robust multimodal feature reconstruction. Extensive experiments on the benchmark datasets RefSegRS and RRSIS-D demonstrate that \ours achieves state-of-the-art performance. Specifically, \ours improves the overall IoU (oIoU) by 3.76 percentage points (80.57) and 1.44 percentage points (79.23) on the two datasets, respectively. Additionally, it outperforms previous methods in the mean IoU (mIoU) by 5.37 percentage points (67.95) and 1.84 percentage points (66.04), effectively addressing the core challenges of RRSIS with enhanced precision and robustness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08486v1-abstract-full').style.display = 'none'; document.getElementById('2502.08486v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08378">arXiv:2502.08378</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.08378">pdf</a>, <a href="https://arxiv.org/format/2502.08378">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Learning Humanoid Standing-up Control across Diverse Postures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Huang%2C+T">Tao Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Ren%2C+J">Junli Ren</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Huayi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zirui Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Ben%2C+Q">Qingwei Ben</a>, <a href="/search/cs?searchtype=author&amp;query=Wen%2C+M">Muning Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+X">Xiao Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jianan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Pang%2C+J">Jiangmiao Pang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08378v1-abstract-short" style="display: inline;"> Standing-up control is crucial for humanoid robots, with the potential for integration into current locomotion and loco-manipulation systems, such as fall recovery. Existing approaches are either limited to simulations that overlook hardware constraints or rely on predefined ground-specific motion trajectories, failing to enable standing up across postures in real-world scenes. To bridge this gap,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08378v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08378v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08378v1-abstract-full" style="display: none;"> Standing-up control is crucial for humanoid robots, with the potential for integration into current locomotion and loco-manipulation systems, such as fall recovery. Existing approaches are either limited to simulations that overlook hardware constraints or rely on predefined ground-specific motion trajectories, failing to enable standing up across postures in real-world scenes. To bridge this gap, we present HoST (Humanoid Standing-up Control), a reinforcement learning framework that learns standing-up control from scratch, enabling robust sim-to-real transfer across diverse postures. HoST effectively learns posture-adaptive motions by leveraging a multi-critic architecture and curriculum-based training on diverse simulated terrains. To ensure successful real-world deployment, we constrain the motion with smoothness regularization and implicit motion speed bound to alleviate oscillatory and violent motions on physical hardware, respectively. After simulation-based training, the learned control policies are directly deployed on the Unitree G1 humanoid robot. Our experimental results demonstrate that the controllers achieve smooth, stable, and robust standing-up motions across a wide range of laboratory and outdoor environments. Videos are available at https://taohuang13.github.io/humanoid-standingup.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08378v1-abstract-full').style.display = 'none'; document.getElementById('2502.08378v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Humanoid Standing-up Control, 12 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08224">arXiv:2502.08224</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.08224">pdf</a>, <a href="https://arxiv.org/format/2502.08224">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Flow-of-Action: SOP Enhanced LLM-Based Multi-Agent System for Root Cause Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Pei%2C+C">Changhua Pei</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zexin Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+F">Fengrui Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zeyan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+X">Xiao He</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+R">Rong Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+T">Tieying Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+J">Jianjun Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jianhui Li</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+G">Gaogang Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Pei%2C+D">Dan Pei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08224v1-abstract-short" style="display: inline;"> In the realm of microservices architecture, the occurrence of frequent incidents necessitates the employment of Root Cause Analysis (RCA) for swift issue resolution. It is common that a serious incident can take several domain experts hours to identify the root cause. Consequently, a contemporary trend involves harnessing Large Language Models (LLMs) as automated agents for RCA. Though the recent&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08224v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08224v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08224v1-abstract-full" style="display: none;"> In the realm of microservices architecture, the occurrence of frequent incidents necessitates the employment of Root Cause Analysis (RCA) for swift issue resolution. It is common that a serious incident can take several domain experts hours to identify the root cause. Consequently, a contemporary trend involves harnessing Large Language Models (LLMs) as automated agents for RCA. Though the recent ReAct framework aligns well with the Site Reliability Engineers (SREs) for its thought-action-observation paradigm, its hallucinations often lead to irrelevant actions and directly affect subsequent results. Additionally, the complex and variable clues of the incident can overwhelm the model one step further. To confront these challenges, we propose Flow-of-Action, a pioneering Standard Operation Procedure (SOP) enhanced LLM-based multi-agent system. By explicitly summarizing the diagnosis steps of SREs, SOP imposes constraints on LLMs at crucial junctures, guiding the RCA process towards the correct trajectory. To facilitate the rational and effective utilization of SOPs, we design an SOP-centric framework called SOP flow. SOP flow contains a series of tools, including one for finding relevant SOPs for incidents, another for automatically generating SOPs for incidents without relevant ones, and a tool for converting SOPs into code. This significantly alleviates the hallucination issues of ReAct in RCA tasks. We also design multiple auxiliary agents to assist the main agent by removing useless noise, narrowing the search space, and informing the main agent whether the RCA procedure can stop. Compared to the ReAct method&#39;s 35.50% accuracy, our Flow-of-Action method achieves 64.01%, meeting the accuracy requirements for RCA in real-world systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08224v1-abstract-full').style.display = 'none'; document.getElementById('2502.08224v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by WWW&#39;25 Industry Track</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> D.2.5 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07968">arXiv:2502.07968</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07968">pdf</a>, <a href="https://arxiv.org/format/2502.07968">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Generative Risk Minimization for Out-of-Distribution Generalization on Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Song Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+Z">Zhen Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Y">Yaochen Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+C">Chuxu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jundong Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07968v1-abstract-short" style="display: inline;"> Out-of-distribution (OOD) generalization on graphs aims at dealing with scenarios where the test graph distribution differs from the training graph distributions. Compared to i.i.d. data like images, the OOD generalization problem on graph-structured data remains challenging due to the non-i.i.d. property and complex structural information on graphs. Recently, several works on graph OOD generaliza&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07968v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07968v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07968v1-abstract-full" style="display: none;"> Out-of-distribution (OOD) generalization on graphs aims at dealing with scenarios where the test graph distribution differs from the training graph distributions. Compared to i.i.d. data like images, the OOD generalization problem on graph-structured data remains challenging due to the non-i.i.d. property and complex structural information on graphs. Recently, several works on graph OOD generalization have explored extracting invariant subgraphs that share crucial classification information across different distributions. Nevertheless, such a strategy could be suboptimal for entirely capturing the invariant information, as the extraction of discrete structures could potentially lead to the loss of invariant information or the involvement of spurious information. In this paper, we propose an innovative framework, named Generative Risk Minimization (GRM), designed to generate an invariant subgraph for each input graph to be classified, instead of extraction. To address the challenge of optimization in the absence of optimal invariant subgraphs (i.e., ground truths), we derive a tractable form of the proposed GRM objective by introducing a latent causal variable, and its effectiveness is validated by our theoretical analysis. We further conduct extensive experiments across a variety of real-world graph datasets for both node-level and graph-level OOD generalization, and the results demonstrate the superiority of our framework GRM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07968v1-abstract-full').style.display = 'none'; document.getElementById('2502.07968v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">TMLR 02/2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07963">arXiv:2502.07963</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07963">pdf</a>, <a href="https://arxiv.org/format/2502.07963">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Caught in the Web of Words: Do LLMs Fall for Spin in Medical Literature? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yun%2C+H+S">Hye Sun Yun</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+K+Y+C">Karen Y. C. Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Kouzy%2C+R">Ramez Kouzy</a>, <a href="/search/cs?searchtype=author&amp;query=Marshall%2C+I+J">Iain J. Marshall</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J+J">Junyi Jessy Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wallace%2C+B+C">Byron C. Wallace</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07963v1-abstract-short" style="display: inline;"> Medical research faces well-documented challenges in translating novel treatments into clinical practice. Publishing incentives encourage researchers to present &#34;positive&#34; findings, even when empirical results are equivocal. Consequently, it is well-documented that authors often spin study results, especially in article abstracts. Such spin can influence clinician interpretation of evidence and ma&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07963v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07963v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07963v1-abstract-full" style="display: none;"> Medical research faces well-documented challenges in translating novel treatments into clinical practice. Publishing incentives encourage researchers to present &#34;positive&#34; findings, even when empirical results are equivocal. Consequently, it is well-documented that authors often spin study results, especially in article abstracts. Such spin can influence clinician interpretation of evidence and may affect patient care decisions. In this study, we ask whether the interpretation of trial results offered by Large Language Models (LLMs) is similarly affected by spin. This is important since LLMs are increasingly being used to trawl through and synthesize published medical evidence. We evaluated 22 LLMs and found that they are across the board more susceptible to spin than humans. They might also propagate spin into their outputs: We find evidence, e.g., that LLMs implicitly incorporate spin into plain language summaries that they generate. We also find, however, that LLMs are generally capable of recognizing spin, and can be prompted in a way to mitigate spin&#39;s impact on LLM outputs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07963v1-abstract-full').style.display = 'none'; document.getElementById('2502.07963v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 10 figures, 3 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07844">arXiv:2502.07844</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07844">pdf</a>, <a href="https://arxiv.org/format/2502.07844">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> The establishment of static digital humans and the integration with spinal models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ju%2C+F">Fujiao Ju</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yuxuan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Shuo Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+C">Chengyin Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yinbo Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jianfeng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Dong%2C+M">Mingjie Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Fang%2C+B">Bin Fang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhuang%2C+Q">Qianyu Zhuang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07844v1-abstract-short" style="display: inline;"> Adolescent idiopathic scoliosis (AIS), a prevalent spinal deformity, significantly affects individuals&#39; health and quality of life. Conventional imaging techniques, such as X - rays, computed tomography (CT), and magnetic resonance imaging (MRI), offer static views of the spine. However, they are restricted in capturing the dynamic changes of the spine and its interactions with overall body motion&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07844v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07844v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07844v1-abstract-full" style="display: none;"> Adolescent idiopathic scoliosis (AIS), a prevalent spinal deformity, significantly affects individuals&#39; health and quality of life. Conventional imaging techniques, such as X - rays, computed tomography (CT), and magnetic resonance imaging (MRI), offer static views of the spine. However, they are restricted in capturing the dynamic changes of the spine and its interactions with overall body motion. Therefore, developing new techniques to address these limitations has become extremely important. Dynamic digital human modeling represents a major breakthrough in digital medicine. It enables a three - dimensional (3D) view of the spine as it changes during daily activities, assisting clinicians in detecting deformities that might be missed in static imaging. Although dynamic modeling holds great potential, constructing an accurate static digital human model is a crucial initial step for high - precision simulations. In this study, our focus is on constructing an accurate static digital human model integrating the spine, which is vital for subsequent dynamic digital human research on AIS. First, we generate human point - cloud data by combining the 3D Gaussian method with the Skinned Multi - Person Linear (SMPL) model from the patient&#39;s multi - view images. Then, we fit a standard skeletal model to the generated human model. Next, we align the real spine model reconstructed from CT images with the standard skeletal model. We validated the resulting personalized spine model using X - ray data from six AIS patients, with Cobb angles (used to measure the severity of scoliosis) as evaluation metrics. The results indicate that the model&#39;s error was within 1 degree of the actual measurements. This study presents an important method for constructing digital humans. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07844v1-abstract-full').style.display = 'none'; document.getElementById('2502.07844v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07839">arXiv:2502.07839</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07839">pdf</a>, <a href="https://arxiv.org/format/2502.07839">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Optimal Actuator Attacks on Autonomous Vehicles Using Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+P">Pengyu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jialu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Shi%2C+L">Ling Shi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07839v1-abstract-short" style="display: inline;"> With the increasing prevalence of autonomous vehicles (AVs), their vulnerability to various types of attacks has grown, presenting significant security challenges. In this paper, we propose a reinforcement learning (RL)-based approach for designing optimal stealthy integrity attacks on AV actuators. We also analyze the limitations of state-of-the-art RL-based secure controllers developed to counte&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07839v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07839v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07839v1-abstract-full" style="display: none;"> With the increasing prevalence of autonomous vehicles (AVs), their vulnerability to various types of attacks has grown, presenting significant security challenges. In this paper, we propose a reinforcement learning (RL)-based approach for designing optimal stealthy integrity attacks on AV actuators. We also analyze the limitations of state-of-the-art RL-based secure controllers developed to counter such attacks. Through extensive simulation experiments, we demonstrate the effectiveness and efficiency of our proposed method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07839v1-abstract-full').style.display = 'none'; document.getElementById('2502.07839v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in 2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS) Workshop</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07640">arXiv:2502.07640</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07640">pdf</a>, <a href="https://arxiv.org/format/2502.07640">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Goedel-Prover: A Frontier Model for Open-Source Automated Theorem Proving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lin%2C+Y">Yong Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+S">Shange Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Lyu%2C+B">Bohan Lyu</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+J">Jiayun Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+H">Hongzhou Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+K">Kaiyu Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jia Li</a>, <a href="/search/cs?searchtype=author&amp;query=Xia%2C+M">Mengzhou Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+D">Danqi Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Arora%2C+S">Sanjeev Arora</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+C">Chi Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07640v2-abstract-short" style="display: inline;"> We introduce Goedel-Prover, an open-source large language model (LLM) that achieves the state-of-the-art (SOTA) performance in automated formal proof generation for mathematical problems. The key challenge in this field is the scarcity of formalized math statements and proofs, which we tackle in the following ways. We train statement formalizers to translate the natural language math problems from&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07640v2-abstract-full').style.display = 'inline'; document.getElementById('2502.07640v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07640v2-abstract-full" style="display: none;"> We introduce Goedel-Prover, an open-source large language model (LLM) that achieves the state-of-the-art (SOTA) performance in automated formal proof generation for mathematical problems. The key challenge in this field is the scarcity of formalized math statements and proofs, which we tackle in the following ways. We train statement formalizers to translate the natural language math problems from Numina into formal language (Lean 4), creating a dataset of 1.64 million formal statements. LLMs are used to check that the formal statements accurately preserve the content of the original natural language problems. We then iteratively build a large dataset of formal proofs by training a series of provers. Each prover succeeds in proving many statements that the previous ones could not, and these new proofs are added to the training set for the next prover. Despite using only supervised fine-tuning, our final prover significantly outperforms the previous best open-source model, DeepSeek-Prover-V1.5, which employs reinforcement learning. On the miniF2F benchmark, our model achieves a success rate of 57.6% (Pass@32), surpassing DeepSeek-Prover-V1.5 by 7.6%. On PutnamBench, Goedel-Prover successfully solves 7 problems (Pass@512), ranking first on the leaderboard. Furthermore, it generates 29.7K formal proofs for Lean Workbook problems, nearly doubling the 15.7K produced by earlier works. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07640v2-abstract-full').style.display = 'none'; document.getElementById('2502.07640v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07628">arXiv:2502.07628</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07628">pdf</a>, <a href="https://arxiv.org/format/2502.07628">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3706598.3714159">10.1145/3706598.3714159 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> HarmonyCut: Supporting Creative Chinese Paper-cutting Design with Form and Connotation Harmony </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Huanchen Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Qiu%2C+T">Tianrun Qiu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiaping Li</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+Z">Zhicong Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+Y">Yuxin Ma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07628v2-abstract-short" style="display: inline;"> Chinese paper-cutting, an Intangible Cultural Heritage (ICH), faces challenges from the erosion of traditional culture due to the prevalence of realism alongside limited public access to cultural elements. While generative AI can enhance paper-cutting design with its extensive knowledge base and efficient production capabilities, it often struggles to align content with cultural meaning due to use&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07628v2-abstract-full').style.display = 'inline'; document.getElementById('2502.07628v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07628v2-abstract-full" style="display: none;"> Chinese paper-cutting, an Intangible Cultural Heritage (ICH), faces challenges from the erosion of traditional culture due to the prevalence of realism alongside limited public access to cultural elements. While generative AI can enhance paper-cutting design with its extensive knowledge base and efficient production capabilities, it often struggles to align content with cultural meaning due to users&#39; and models&#39; lack of comprehensive paper-cutting knowledge. To address these issues, we conducted a formative study (N=7) to identify the workflow and design space, including four core factors (Function, Subject Matter, Style, and Method of Expression) and a key element (Pattern). We then developed HarmonyCut, a generative AI-based tool that translates abstract intentions into creative and structured ideas. This tool facilitates the exploration of suggested related content (knowledge, works, and patterns), enabling users to select, combine, and adjust elements for creative paper-cutting design. A user study (N=16) and an expert evaluation (N=3) demonstrated that HarmonyCut effectively provided relevant knowledge, aiding the ideation of diverse paper-cutting designs and maintaining design quality within the design space to ensure alignment between form and cultural connotation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07628v2-abstract-full').style.display = 'none'; document.getElementById('2502.07628v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages, 13 figures, 6 tables. Conditionally accepted to ACM CHI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07527">arXiv:2502.07527</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07527">pdf</a>, <a href="https://arxiv.org/format/2502.07527">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> NatureLM: Deciphering the Language of Nature for Scientific Discovery </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xia%2C+Y">Yingce Xia</a>, <a href="/search/cs?searchtype=author&amp;query=Jin%2C+P">Peiran Jin</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+S">Shufang Xie</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+L">Liang He</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+C">Chuan Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Luo%2C+R">Renqian Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+G">Guoqing Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yue Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zequn Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yuan-Jyue Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+Z">Zekun Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Bai%2C+Y">Yeqi Bai</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+P">Pan Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Min%2C+Y">Yaosen Min</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+Z">Ziheng Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Hao%2C+H">Hongxia Hao</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+H">Han Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jielan Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+C">Chang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Jia Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+J">Jianwei Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+K">Kehan Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+W">Wei Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+K">Kaiyuan Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Pei%2C+Q">Qizhi Pei</a> , et al. (20 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07527v1-abstract-short" style="display: inline;"> Foundation models have revolutionized natural language processing and artificial intelligence, significantly enhancing how machines comprehend and generate human languages. Inspired by the success of these foundation models, researchers have developed foundation models for individual scientific domains, including small molecules, materials, proteins, DNA, and RNA. However, these models are typical&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07527v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07527v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07527v1-abstract-full" style="display: none;"> Foundation models have revolutionized natural language processing and artificial intelligence, significantly enhancing how machines comprehend and generate human languages. Inspired by the success of these foundation models, researchers have developed foundation models for individual scientific domains, including small molecules, materials, proteins, DNA, and RNA. However, these models are typically trained in isolation, lacking the ability to integrate across different scientific domains. Recognizing that entities within these domains can all be represented as sequences, which together form the &#34;language of nature&#34;, we introduce Nature Language Model (briefly, NatureLM), a sequence-based science foundation model designed for scientific discovery. Pre-trained with data from multiple scientific domains, NatureLM offers a unified, versatile model that enables various applications including: (i) generating and optimizing small molecules, proteins, RNA, and materials using text instructions; (ii) cross-domain generation/design, such as protein-to-molecule and protein-to-RNA generation; and (iii) achieving state-of-the-art performance in tasks like SMILES-to-IUPAC translation and retrosynthesis on USPTO-50k. NatureLM offers a promising generalist approach for various scientific tasks, including drug discovery (hit generation/optimization, ADMET optimization, synthesis), novel material design, and the development of therapeutic proteins or nucleotides. We have developed NatureLM models in different sizes (1 billion, 8 billion, and 46.7 billion parameters) and observed a clear improvement in performance as the model size increases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07527v1-abstract-full').style.display = 'none'; document.getElementById('2502.07527v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">81 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07490">arXiv:2502.07490</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07490">pdf</a>, <a href="https://arxiv.org/format/2502.07490">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Mask-Enhanced Autoregressive Prediction: Pay Less Attention to Learn More </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhuang%2C+X">Xialie Zhuang</a>, <a href="/search/cs?searchtype=author&amp;query=Jia%2C+Z">Zhikai Jia</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jianjin Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhenyu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+L">Li Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+Z">Zheng Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+S">Shiwei Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07490v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) are discovered to suffer from accurately retrieving key information. To address this, we propose Mask-Enhanced Autoregressive Prediction (MEAP), a simple yet effective training paradigm that seamlessly integrates Masked Language Modeling (MLM) into Next-Token Prediction (NTP) to enhance the latter&#39;s in-context retrieval capabilities. Specifically, MEAP first randomly m&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07490v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07490v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07490v1-abstract-full" style="display: none;"> Large Language Models (LLMs) are discovered to suffer from accurately retrieving key information. To address this, we propose Mask-Enhanced Autoregressive Prediction (MEAP), a simple yet effective training paradigm that seamlessly integrates Masked Language Modeling (MLM) into Next-Token Prediction (NTP) to enhance the latter&#39;s in-context retrieval capabilities. Specifically, MEAP first randomly masks a small fraction of input tokens and then directly performs the standard next-token prediction autoregressive using a decoder-only Transformer. MEAP eliminates the need for bidirectional attention or encoder-decoder architectures for MLM, incurring no additional computational overhead during pre-training or inference. Intensive experiments demonstrate that MEAP substantially outperforms NTP on key information retrieval and long-context reasoning tasks, while performing on par or better on commonsense reasoning tasks. The benefits of MEAP also extend to supervised fine-tuning, where it shows remarkable advantages in lost-in-the-middle scenarios, outperforming NTP by 11.77 percentage points. Our analysis indicates that MEAP&#39;s effectiveness arises from its ability to promote more distinguishable attention scores by concentrating on a reduced set of non-masked tokens. This mechanism improves the model&#39;s focus on task-relevant signals while mitigating the influence of peripheral context. These findings position MEAP as a promising training paradigm for large language models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07490v1-abstract-full').style.display = 'none'; document.getElementById('2502.07490v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages,7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07388">arXiv:2502.07388</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07388">pdf</a>, <a href="https://arxiv.org/format/2502.07388">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> UAV-assisted Joint Mobile Edge Computing and Data Collection via Matching-enabled Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+B">Boxiong Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+H">Hui Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiahui Li</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+G">Geng Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Z">Zemin Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Niyato%2C+D">Dusit Niyato</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07388v1-abstract-short" style="display: inline;"> Unmanned aerial vehicle (UAV)-assisted mobile edge computing (MEC) and data collection (DC) have been popular research issues. Different from existing works that consider MEC and DC scenarios separately, this paper investigates a multi-UAV-assisted joint MEC-DC system. Specifically, we formulate a joint optimization problem to minimize the MEC latency and maximize the collected data volume. This p&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07388v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07388v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07388v1-abstract-full" style="display: none;"> Unmanned aerial vehicle (UAV)-assisted mobile edge computing (MEC) and data collection (DC) have been popular research issues. Different from existing works that consider MEC and DC scenarios separately, this paper investigates a multi-UAV-assisted joint MEC-DC system. Specifically, we formulate a joint optimization problem to minimize the MEC latency and maximize the collected data volume. This problem can be classified as a non-convex mixed integer programming problem that exhibits long-term optimization and dynamics. Thus, we propose a deep reinforcement learning-based approach that jointly optimizes the UAV movement, user transmit power, and user association in real time to solve the problem efficiently. Specifically, we reformulate the optimization problem into an action space-reduced Markov decision process (MDP) and optimize the user association by using a two-phase matching-based association (TMA) strategy. Subsequently, we propose a soft actor-critic (SAC)-based approach that integrates the proposed TMA strategy (SAC-TMA) to solve the formulated joint optimization problem collaboratively. Simulation results demonstrate that the proposed SAC-TMA is able to coordinate the two subsystems and can effectively reduce the system latency and improve the data collection volume compared with other benchmark algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07388v1-abstract-full').style.display = 'none'; document.getElementById('2502.07388v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07365">arXiv:2502.07365</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07365">pdf</a>, <a href="https://arxiv.org/format/2502.07365">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LongReD: Mitigating Short-Text Degradation of Long-Context Large Language Models via Restoration Distillation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dong%2C+Z">Zican Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Junyi Li</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+J">Jinhao Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+M">Mingyu Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+W+X">Wayne Xin Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+B">Bingning Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+W">Weipeng Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07365v1-abstract-short" style="display: inline;"> Large language models (LLMs) have gained extended context windows through scaling positional encodings and lightweight continual pre-training. However, this often leads to degraded performance on short-text tasks, while the reasons for this degradation remain insufficiently explored. In this work, we identify two primary factors contributing to this issue: distribution drift in hidden states and a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07365v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07365v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07365v1-abstract-full" style="display: none;"> Large language models (LLMs) have gained extended context windows through scaling positional encodings and lightweight continual pre-training. However, this often leads to degraded performance on short-text tasks, while the reasons for this degradation remain insufficiently explored. In this work, we identify two primary factors contributing to this issue: distribution drift in hidden states and attention scores, and catastrophic forgetting during continual pre-training. To address these challenges, we propose Long Context Pre-training with Restoration Distillation (LongReD), a novel approach designed to mitigate short-text performance degradation through minimizing the distribution discrepancy between the extended and original models. Besides training on long texts, LongReD distills the hidden state of selected layers from the original model on short texts. Additionally, LongReD also introduces a short-to-long distillation, aligning the output distribution on short texts with that on long texts by leveraging skipped positional indices. Experiments on common text benchmarks demonstrate that LongReD effectively preserves the model&#39;s short-text performance while maintaining comparable or even better capacity to handle long texts than baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07365v1-abstract-full').style.display = 'none'; document.getElementById('2502.07365v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07316">arXiv:2502.07316</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07316">pdf</a>, <a href="https://arxiv.org/format/2502.07316">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> CodeI/O: Condensing Reasoning Patterns via Code Input-Output Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Junlong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+D">Daya Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+D">Dejian Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+R">Runxin Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yu Wu</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+J">Junxian He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07316v2-abstract-short" style="display: inline;"> Reasoning is a fundamental capability of Large Language Models. While prior research predominantly focuses on enhancing narrow skills like math or code generation, improving performance on many other reasoning tasks remains challenging due to sparse and fragmented training data. To address this issue, we propose CodeI/O, a novel approach that systematically condenses diverse reasoning patterns inh&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07316v2-abstract-full').style.display = 'inline'; document.getElementById('2502.07316v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07316v2-abstract-full" style="display: none;"> Reasoning is a fundamental capability of Large Language Models. While prior research predominantly focuses on enhancing narrow skills like math or code generation, improving performance on many other reasoning tasks remains challenging due to sparse and fragmented training data. To address this issue, we propose CodeI/O, a novel approach that systematically condenses diverse reasoning patterns inherently embedded in contextually-grounded codes, through transforming the original code into a code input-output prediction format. By training models to predict inputs/outputs given code and test cases entirely in natural language as Chain-of-Thought (CoT) rationales, we expose them to universal reasoning primitives -- like logic flow planning, state-space searching, decision tree traversal, and modular decomposition -- while decoupling structured reasoning from code-specific syntax and preserving procedural rigor. Experimental results demonstrate CodeI/O leads to consistent improvements across symbolic, scientific, logic, math &amp; numerical, and commonsense reasoning tasks. By matching the existing ground-truth outputs or re-executing the code with predicted inputs, we can verify each prediction and further enhance the CoTs through multi-turn revision, resulting in CodeI/O++ and achieving higher performance. Our data and models are available at https://github.com/hkust-nlp/CodeIO. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07316v2-abstract-full').style.display = 'none'; document.getElementById('2502.07316v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07295">arXiv:2502.07295</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07295">pdf</a>, <a href="https://arxiv.org/format/2502.07295">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Treatment Effect Estimation for Exponential Family Outcomes using Neural Networks with Targeted Regularization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiahong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Z">Zeqin Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Dan%2C+J">Jiayi Dan</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+J">Jixing Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Zou%2C+Z">Zhichao Zou</a>, <a href="/search/cs?searchtype=author&amp;query=Zhen%2C+P">Peng Zhen</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+J">Jiecheng Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07295v1-abstract-short" style="display: inline;"> Neural Networks (NNs) have became a natural choice for treatment effect estimation due to their strong approximation capabilities. Nevertheless, how to design NN-based estimators with desirable properties, such as low bias and doubly robustness, still remains a significant challenge. A common approach to address this is targeted regularization, which modifies the objective function of NNs. However&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07295v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07295v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07295v1-abstract-full" style="display: none;"> Neural Networks (NNs) have became a natural choice for treatment effect estimation due to their strong approximation capabilities. Nevertheless, how to design NN-based estimators with desirable properties, such as low bias and doubly robustness, still remains a significant challenge. A common approach to address this is targeted regularization, which modifies the objective function of NNs. However, existing works on targeted regularization are limited to Gaussian-distributed outcomes, significantly restricting their applicability in real-world scenarios. In this work, we aim to bridge this blank by extending this framework to the boarder exponential family outcomes. Specifically, we first derive the von-Mises expansion of the Average Dose function of Canonical Functions (ADCF), which inspires us how to construct a doubly robust estimator with good properties. Based on this, we develop a NN-based estimator for ADCF by generalizing functional targeted regularization to exponential families, and provide the corresponding theoretical convergence rate. Extensive experimental results demonstrate the effectiveness of our proposed model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07295v1-abstract-full').style.display = 'none'; document.getElementById('2502.07295v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07289">arXiv:2502.07289</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.07289">pdf</a>, <a href="https://arxiv.org/format/2502.07289">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Learning Inverse Laplacian Pyramid for Progressive Depth Completion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+K">Kun Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yan%2C+Z">Zhiqiang Yan</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+J">Junkai Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jun Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Jian Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07289v1-abstract-short" style="display: inline;"> Depth completion endeavors to reconstruct a dense depth map from sparse depth measurements, leveraging the information provided by a corresponding color image. Existing approaches mostly hinge on single-scale propagation strategies that iteratively ameliorate initial coarse depth estimates through pixel-level message passing. Despite their commendable outcomes, these techniques are frequently hamp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07289v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07289v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07289v1-abstract-full" style="display: none;"> Depth completion endeavors to reconstruct a dense depth map from sparse depth measurements, leveraging the information provided by a corresponding color image. Existing approaches mostly hinge on single-scale propagation strategies that iteratively ameliorate initial coarse depth estimates through pixel-level message passing. Despite their commendable outcomes, these techniques are frequently hampered by computational inefficiencies and a limited grasp of scene context. To circumvent these challenges, we introduce LP-Net, an innovative framework that implements a multi-scale, progressive prediction paradigm based on Laplacian Pyramid decomposition. Diverging from propagation-based approaches, LP-Net initiates with a rudimentary, low-resolution depth prediction to encapsulate the global scene context, subsequently refining this through successive upsampling and the reinstatement of high-frequency details at incremental scales. We have developed two novel modules to bolster this strategy: 1) the Multi-path Feature Pyramid module, which segregates feature maps into discrete pathways, employing multi-scale transformations to amalgamate comprehensive spatial information, and 2) the Selective Depth Filtering module, which dynamically learns to apply both smoothness and sharpness filters to judiciously mitigate noise while accentuating intricate details. By integrating these advancements, LP-Net not only secures state-of-the-art (SOTA) performance across both outdoor and indoor benchmarks such as KITTI, NYUv2, and TOFDC, but also demonstrates superior computational efficiency. At the time of submission, LP-Net ranks 1st among all peer-reviewed methods on the official KITTI leaderboard. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07289v1-abstract-full').style.display = 'none'; document.getElementById('2502.07289v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06887">arXiv:2502.06887</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.06887">pdf</a>, <a href="https://arxiv.org/ps/2502.06887">ps</a>, <a href="https://arxiv.org/format/2502.06887">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Gradient Based Method for the Fusion of Lattice Quantizers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Liyuan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+H">Hanzhong Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiaheng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+M">Minyang Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06887v1-abstract-short" style="display: inline;"> In practical applications, lattice quantizers leverage discrete lattice points to approximate arbitrary points in the lattice. An effective lattice quantizer significantly enhances both the accuracy and efficiency of these approximations. In the context of high-dimensional lattice quantization, previous work proposed utilizing low-dimensional optimal lattice quantizers and addressed the challenge&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06887v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06887v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06887v1-abstract-full" style="display: none;"> In practical applications, lattice quantizers leverage discrete lattice points to approximate arbitrary points in the lattice. An effective lattice quantizer significantly enhances both the accuracy and efficiency of these approximations. In the context of high-dimensional lattice quantization, previous work proposed utilizing low-dimensional optimal lattice quantizers and addressed the challenge of determining the optimal length ratio in orthogonal splicing. Notably, it was demonstrated that fixed length ratios and orthogonality yield suboptimal results when combining low-dimensional lattices. Building on this foundation, another approach employed gradient descent to identify optimal lattices, which inspired us to explore the use of neural networks to discover matrices that outperform those obtained from orthogonal splicing methods. We propose two novel approaches to tackle this problem: the Household Algorithm and the Matrix Exp Algorithm. Our results indicate that both the Household Algorithm and the Matrix Exp Algorithm achieve improvements in lattice quantizers across dimensions 13, 15, 17 to 19, 21, and 22. Moreover, the Matrix Exp Algorithm demonstrates superior efficacy in high-dimensional settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06887v1-abstract-full').style.display = 'none'; document.getElementById('2502.06887v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06820">arXiv:2502.06820</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.06820">pdf</a>, <a href="https://arxiv.org/format/2502.06820">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LoCA: Location-Aware Cosine Adaptation for Parameter-Efficient Fine-Tuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Du%2C+Z">Zhekai Du</a>, <a href="/search/cs?searchtype=author&amp;query=Min%2C+Y">Yinjie Min</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jingjing Li</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+K">Ke Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Zou%2C+C">Changliang Zou</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+L">Liuhua Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Chu%2C+T">Tingjin Chu</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+M">Mingming Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06820v1-abstract-short" style="display: inline;"> Low-rank adaptation (LoRA) has become a prevalent method for adapting pre-trained large language models to downstream tasks. However, the simple low-rank decomposition form may constrain the hypothesis space. To address this limitation, we introduce Location-aware Cosine Adaptation (LoCA), a novel frequency-domain parameter-efficient fine-tuning method based on inverse Discrete Cosine Transform (i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06820v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06820v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06820v1-abstract-full" style="display: none;"> Low-rank adaptation (LoRA) has become a prevalent method for adapting pre-trained large language models to downstream tasks. However, the simple low-rank decomposition form may constrain the hypothesis space. To address this limitation, we introduce Location-aware Cosine Adaptation (LoCA), a novel frequency-domain parameter-efficient fine-tuning method based on inverse Discrete Cosine Transform (iDCT) with selective locations of learnable components. We begin with a comprehensive theoretical comparison between frequency-domain and low-rank decompositions for fine-tuning pre-trained large models. Our analysis reveals that frequency-domain approximation with carefully selected frequency components can surpass the expressivity of traditional low-rank-based methods. Furthermore, we demonstrate that iDCT offers a more efficient implementation compared to inverse Discrete Fourier Transform (iDFT), allowing for better selection and tuning of frequency components while maintaining equivalent expressivity to the optimal iDFT-based adaptation. By employing finite-difference approximation to estimate gradients for discrete locations of learnable coefficients on the DCT spectrum, LoCA dynamically selects the most informative frequency components during training. Experiments on diverse language and vision fine-tuning tasks demonstrate that LoCA offers enhanced parameter efficiency while maintains computational feasibility comparable to low-rank-based methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06820v1-abstract-full').style.display = 'none'; document.getElementById('2502.06820v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06619">arXiv:2502.06619</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.06619">pdf</a>, <a href="https://arxiv.org/format/2502.06619">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.3390/s25020552">10.3390/s25020552 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Unleashing the Potential of Pre-Trained Diffusion Models for Generalizable Person Re-Identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiachen Li</a>, <a href="/search/cs?searchtype=author&amp;query=Gong%2C+X">Xiaojin Gong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06619v2-abstract-short" style="display: inline;"> Domain-generalizable re-identification (DG Re-ID) aims to train a model on one or more source domains and evaluate its performance on unseen target domains, a task that has attracted growing attention due to its practical relevance. While numerous methods have been proposed, most rely on discriminative or contrastive learning frameworks to learn generalizable feature representations. However, thes&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06619v2-abstract-full').style.display = 'inline'; document.getElementById('2502.06619v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06619v2-abstract-full" style="display: none;"> Domain-generalizable re-identification (DG Re-ID) aims to train a model on one or more source domains and evaluate its performance on unseen target domains, a task that has attracted growing attention due to its practical relevance. While numerous methods have been proposed, most rely on discriminative or contrastive learning frameworks to learn generalizable feature representations. However, these approaches often fail to mitigate shortcut learning, leading to suboptimal performance. In this work, we propose a novel method called diffusion model-assisted representation learning with a correlation-aware conditioning scheme (DCAC) to enhance DG Re-ID. Our method integrates a discriminative and contrastive Re-ID model with a pre-trained diffusion model through a correlation-aware conditioning scheme. By incorporating ID classification probabilities generated from the Re-ID model with a set of learnable ID-wise prompts, the conditioning scheme injects dark knowledge that captures ID correlations to guide the diffusion process. Simultaneously, feedback from the diffusion model is back-propagated through the conditioning scheme to the Re-ID model, effectively improving the generalization capability of Re-ID features. Extensive experiments on both single-source and multi-source DG Re-ID tasks demonstrate that our method achieves state-of-the-art performance. Comprehensive ablation studies further validate the effectiveness of the proposed approach, providing insights into its robustness. Codes will be available at https://github.com/RikoLi/DCAC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06619v2-abstract-full').style.display = 'none'; document.getElementById('2502.06619v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06282">arXiv:2502.06282</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.06282">pdf</a>, <a href="https://arxiv.org/format/2502.06282">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Jakiro: Boosting Speculative Decoding with Decoupled Multi-Head via MoE </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Huang%2C+H">Haiduo Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+F">Fuwei Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Z">Zhenhua Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+Y">Yixing Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jinze Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Yin%2C+X">Xuanwu Yin</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+D">Dong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ren%2C+P">Pengju Ren</a>, <a href="/search/cs?searchtype=author&amp;query=Barsoum%2C+E">Emad Barsoum</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06282v1-abstract-short" style="display: inline;"> Speculative decoding (SD) accelerates large language model inference by using a smaller draft model to predict multiple tokens, which are then verified in parallel by the larger target model. However, the limited capacity of the draft model often necessitates tree-based sampling to improve prediction accuracy, where multiple candidates are generated at each step. We identify a key limitation in th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06282v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06282v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06282v1-abstract-full" style="display: none;"> Speculative decoding (SD) accelerates large language model inference by using a smaller draft model to predict multiple tokens, which are then verified in parallel by the larger target model. However, the limited capacity of the draft model often necessitates tree-based sampling to improve prediction accuracy, where multiple candidates are generated at each step. We identify a key limitation in this approach: the candidates at the same step are derived from the same representation, limiting diversity and reducing overall effectiveness. To address this, we propose Jakiro, leveraging Mixture of Experts (MoE), where independent experts generate diverse predictions, effectively decoupling correlations among candidates. Furthermore, we introduce a hybrid inference strategy, combining autoregressive decoding for initial tokens with parallel decoding for subsequent stages, and enhance the latter with contrastive mechanism in features to improve accuracy. Our method significantly boosts prediction accuracy and achieves higher inference speedups. Extensive experiments across diverse models validate the effectiveness and robustness of our approach, establishing a new SOTA in speculative decoding. Our codes are available at https://github.com/haiduo/Jakiro. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06282v1-abstract-full').style.display = 'none'; document.getElementById('2502.06282v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06255">arXiv:2502.06255</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.06255">pdf</a>, <a href="https://arxiv.org/format/2502.06255">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards Efficient and Intelligent Laser Weeding: Method and Dataset for Weed Stem Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+D">Dingning Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jinzhe Li</a>, <a href="/search/cs?searchtype=author&amp;query=Su%2C+H">Haoyang Su</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+B">Bei Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zhihui Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+Q">Qingbo Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Ouyang%2C+W">Wanli Ouyang</a>, <a href="/search/cs?searchtype=author&amp;query=Dong%2C+N">Nanqing Dong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06255v1-abstract-short" style="display: inline;"> Weed control is a critical challenge in modern agriculture, as weeds compete with crops for essential nutrient resources, significantly reducing crop yield and quality. Traditional weed control methods, including chemical and mechanical approaches, have real-life limitations such as associated environmental impact and efficiency. An emerging yet effective approach is laser weeding, which uses a la&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06255v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06255v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06255v1-abstract-full" style="display: none;"> Weed control is a critical challenge in modern agriculture, as weeds compete with crops for essential nutrient resources, significantly reducing crop yield and quality. Traditional weed control methods, including chemical and mechanical approaches, have real-life limitations such as associated environmental impact and efficiency. An emerging yet effective approach is laser weeding, which uses a laser beam as the stem cutter. Although there have been studies that use deep learning in weed recognition, its application in intelligent laser weeding still requires a comprehensive understanding. Thus, this study represents the first empirical investigation of weed recognition for laser weeding. To increase the efficiency of laser beam cut and avoid damaging the crops of interest, the laser beam shall be directly aimed at the weed root. Yet, weed stem detection remains an under-explored problem. We integrate the detection of crop and weed with the localization of weed stem into one end-to-end system. To train and validate the proposed system in a real-life scenario, we curate and construct a high-quality weed stem detection dataset with human annotations. The dataset consists of 7,161 high-resolution pictures collected in the field with annotations of 11,151 instances of weed. Experimental results show that the proposed system improves weeding accuracy by 6.7% and reduces energy cost by 32.3% compared to existing weed recognition systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06255v1-abstract-full').style.display = 'none'; document.getElementById('2502.06255v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI-AISI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06197">arXiv:2502.06197</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.06197">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Timing Matters: How Using LLMs at Different Timings Influences Writers&#39; Perceptions and Ideation Outcomes in AI-Assisted Ideation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qin%2C+P">Peinuan Qin</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+C">Chi-Lan Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jingshu Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wen%2C+J">Jing Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+Y">Yi-Chieh Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06197v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have been widely used to support ideation in the writing process. However, whether generating ideas with the help of LLMs leads to idea fixation or idea expansion is unclear. This study examines how different timings of LLM usage - either at the beginning or after independent ideation - affect people&#39;s perceptions and ideation outcomes in a writing task. In a controlle&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06197v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06197v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06197v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have been widely used to support ideation in the writing process. However, whether generating ideas with the help of LLMs leads to idea fixation or idea expansion is unclear. This study examines how different timings of LLM usage - either at the beginning or after independent ideation - affect people&#39;s perceptions and ideation outcomes in a writing task. In a controlled experiment with 60 participants, we found that using LLMs from the beginning reduced the number of original ideas and lowered creative self-efficacy and self-credit, mediated by changes in autonomy and ownership. We discuss the challenges and opportunities associated with using LLMs to assist in idea generation. We propose delaying the use of LLMs to support ideation while considering users&#39; self-efficacy, autonomy, and ownership of the ideation outcomes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06197v1-abstract-full').style.display = 'none'; document.getElementById('2502.06197v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06164">arXiv:2502.06164</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.06164">pdf</a>, <a href="https://arxiv.org/format/2502.06164">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Generalized Temporal Tensor Decomposition with Rank-revealing Latent-ODE </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+P">Panqi Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+L">Lei Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jianlong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+W">Weichang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+W">Weiqing Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Bian%2C+J">Jiang Bian</a>, <a href="/search/cs?searchtype=author&amp;query=Fang%2C+S">Shikai Fang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06164v1-abstract-short" style="display: inline;"> Tensor decomposition is a fundamental tool for analyzing multi-dimensional data by learning low-rank factors to represent high-order interactions. While recent works on temporal tensor decomposition have made significant progress by incorporating continuous timestamps in latent factors, they still struggle with general tensor data with continuous indexes not only in the temporal mode but also in o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06164v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06164v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06164v1-abstract-full" style="display: none;"> Tensor decomposition is a fundamental tool for analyzing multi-dimensional data by learning low-rank factors to represent high-order interactions. While recent works on temporal tensor decomposition have made significant progress by incorporating continuous timestamps in latent factors, they still struggle with general tensor data with continuous indexes not only in the temporal mode but also in other modes, such as spatial coordinates in climate data. Additionally, the problem of determining the tensor rank remains largely unexplored in temporal tensor models. To address these limitations, we propose \underline{G}eneralized temporal tensor decomposition with \underline{R}ank-r\underline{E}vealing laten\underline{T}-ODE (GRET). Our approach encodes continuous spatial indexes as learnable Fourier features and employs neural ODEs in latent space to learn the temporal trajectories of factors. To automatically reveal the rank of temporal tensors, we introduce a rank-revealing Gaussian-Gamma prior over the factor trajectories. We develop an efficient variational inference scheme with an analytical evidence lower bound, enabling sampling-free optimization. Through extensive experiments on both synthetic and real-world datasets, we demonstrate that GRET not only reveals the underlying ranks of temporal tensors but also significantly outperforms existing methods in prediction performance and robustness against noise. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06164v1-abstract-full').style.display = 'none'; document.getElementById('2502.06164v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05932">arXiv:2502.05932</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.05932">pdf</a>, <a href="https://arxiv.org/format/2502.05932">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Skill Expansion and Composition in Parameter Space </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+T">Tenglong Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jianxiong Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Y">Yinan Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Niu%2C+H">Haoyi Niu</a>, <a href="/search/cs?searchtype=author&amp;query=Lan%2C+Y">Yixing Lan</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+X">Xin Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhan%2C+X">Xianyuan Zhan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05932v1-abstract-short" style="display: inline;"> Humans excel at reusing prior knowledge to address new challenges and developing skills while solving problems. This paradigm becomes increasingly popular in the development of autonomous agents, as it develops systems that can self-evolve in response to new challenges like human beings. However, previous methods suffer from limited training efficiency when expanding new skills and fail to fully l&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05932v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05932v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05932v1-abstract-full" style="display: none;"> Humans excel at reusing prior knowledge to address new challenges and developing skills while solving problems. This paradigm becomes increasingly popular in the development of autonomous agents, as it develops systems that can self-evolve in response to new challenges like human beings. However, previous methods suffer from limited training efficiency when expanding new skills and fail to fully leverage prior knowledge to facilitate new task learning. In this paper, we propose Parametric Skill Expansion and Composition (PSEC), a new framework designed to iteratively evolve the agents&#39; capabilities and efficiently address new challenges by maintaining a manageable skill library. This library can progressively integrate skill primitives as plug-and-play Low-Rank Adaptation (LoRA) modules in parameter-efficient finetuning, facilitating efficient and flexible skill expansion. This structure also enables the direct skill compositions in parameter space by merging LoRA modules that encode different skills, leveraging shared information across skills to effectively program new skills. Based on this, we propose a context-aware module to dynamically activate different skills to collaboratively handle new tasks. Empowering diverse applications including multi-objective composition, dynamics shift, and continual policy shift, the results on D4RL, DSRL benchmarks, and the DeepMind Control Suite show that PSEC exhibits superior capacity to leverage prior knowledge to efficiently tackle new challenges, as well as expand its skill libraries to evolve the capabilities. Project website: https://ltlhuuu.github.io/PSEC/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05932v1-abstract-full').style.display = 'none'; document.getElementById('2502.05932v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2025, 37 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05855">arXiv:2502.05855</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.05855">pdf</a>, <a href="https://arxiv.org/format/2502.05855">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DexVLA: Vision-Language Model with Plug-In Diffusion Expert for General Robot Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wen%2C+J">Junjie Wen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Y">Yichen Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jinming Li</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+Z">Zhibin Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+C">Chaomin Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Feng%2C+F">Feifei Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05855v1-abstract-short" style="display: inline;"> Enabling robots to perform diverse tasks across varied environments is a central challenge in robot learning. While vision-language-action (VLA) models have shown promise for generalizable robot skills, realizing their full potential requires addressing limitations in action representation and efficient training. Current VLA models often focus on scaling the vision-language model (VLM) component,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05855v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05855v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05855v1-abstract-full" style="display: none;"> Enabling robots to perform diverse tasks across varied environments is a central challenge in robot learning. While vision-language-action (VLA) models have shown promise for generalizable robot skills, realizing their full potential requires addressing limitations in action representation and efficient training. Current VLA models often focus on scaling the vision-language model (VLM) component, while the action space representation remains a critical bottleneck. This paper introduces DexVLA, a novel framework designed to enhance the efficiency and generalization capabilities of VLAs for complex, long-horizon tasks across diverse robot embodiments. DexVLA features a novel diffusion-based action expert, scaled to one billion parameters, designed for cross-embodiment learning. A novel embodiment curriculum learning strategy facilitates efficient training: (1) pre-training the diffusion expert that is separable from the VLA on cross-embodiment data, (2) aligning the VLA model to specific embodiments, and (3) post-training for rapid adaptation to new tasks. We conduct comprehensive experiments across multiple embodiments, including single-arm, bimanual, and dexterous hand, demonstrating DexVLA&#39;s adaptability to challenging tasks without task-specific adaptation, its ability to learn dexterous skills on novel embodiments with limited data, and its capacity to complete complex, long-horizon tasks using only direct language prompting, such as laundry folding. In all settings, our method demonstrates superior performance compared to state-of-the-art models like Octo, OpenVLA, and Diffusion Policy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05855v1-abstract-full').style.display = 'none'; document.getElementById('2502.05855v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The webpage is at https://dex-vla.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05824">arXiv:2502.05824</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.05824">pdf</a>, <a href="https://arxiv.org/format/2502.05824">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Aerial Reliable Collaborative Communications for Terrestrial Mobile Users via Evolutionary Multi-Objective Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sun%2C+G">Geng Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+J">Jian Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiahui Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Kang%2C+J">Jiawen Kang</a>, <a href="/search/cs?searchtype=author&amp;query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&amp;query=Mao%2C+S">Shiwen Mao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05824v1-abstract-short" style="display: inline;"> Unmanned aerial vehicles (UAVs) have emerged as the potential aerial base stations (BSs) to improve terrestrial communications. However, the limited onboard energy and antenna power of a UAV restrict its communication range and transmission capability. To address these limitations, this work employs collaborative beamforming through a UAV-enabled virtual antenna array to improve transmission perfo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05824v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05824v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05824v1-abstract-full" style="display: none;"> Unmanned aerial vehicles (UAVs) have emerged as the potential aerial base stations (BSs) to improve terrestrial communications. However, the limited onboard energy and antenna power of a UAV restrict its communication range and transmission capability. To address these limitations, this work employs collaborative beamforming through a UAV-enabled virtual antenna array to improve transmission performance from the UAV to terrestrial mobile users, under interference from non-associated BSs and dynamic channel conditions. Specifically, we introduce a memory-based random walk model to more accurately depict the mobility patterns of terrestrial mobile users. Following this, we formulate a multi-objective optimization problem (MOP) focused on maximizing the transmission rate while minimizing the flight energy consumption of the UAV swarm. Given the NP-hard nature of the formulated MOP and the highly dynamic environment, we transform this problem into a multi-objective Markov decision process and propose an improved evolutionary multi-objective reinforcement learning algorithm. Specifically, this algorithm introduces an evolutionary learning approach to obtain the approximate Pareto set for the formulated MOP. Moreover, the algorithm incorporates a long short-term memory network and hyper-sphere-based task selection method to discern the movement patterns of terrestrial mobile users and improve the diversity of the obtained Pareto set. Simulation results demonstrate that the proposed method effectively generates a diverse range of non-dominated policies and outperforms existing methods. Additional simulations demonstrate the scalability and robustness of the proposed CB-based method under different system parameters and various unexpected circumstances. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05824v1-abstract-full').style.display = 'none'; document.getElementById('2502.05824v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05773">arXiv:2502.05773</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.05773">pdf</a>, <a href="https://arxiv.org/format/2502.05773">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> PIPA: Preference Alignment as Prior-Informed Statistical Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Junbo Li</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zhangyang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Q">Qiang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05773v1-abstract-short" style="display: inline;"> Offline preference alignment for language models such as Direct Preference Optimization (DPO) is favored for its effectiveness and simplicity, eliminating the need for costly reinforcement learning. Various offline algorithms have been developed for different data settings, yet they lack a unified understanding. In this study, we introduce Pior-Informed Preference Alignment (PIPA), a unified, RL&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05773v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05773v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05773v1-abstract-full" style="display: none;"> Offline preference alignment for language models such as Direct Preference Optimization (DPO) is favored for its effectiveness and simplicity, eliminating the need for costly reinforcement learning. Various offline algorithms have been developed for different data settings, yet they lack a unified understanding. In this study, we introduce Pior-Informed Preference Alignment (PIPA), a unified, RL-free probabilistic framework that formulates language model preference alignment as a Maximum Likelihood Estimation (MLE) problem with prior constraints. This method effectively accommodates both paired and unpaired data, as well as answer and step-level annotations. We illustrate that DPO and KTO are special cases with different prior constraints within our framework. By integrating different types of prior information, we developed two variations of PIPA: PIPA-M and PIPA-N. Both algorithms demonstrate a $3\sim10\%$ performance enhancement on the GSM8K and MATH benchmarks across all configurations, achieving these gains without additional training or computational costs compared to existing algorithms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05773v1-abstract-full').style.display = 'none'; document.getElementById('2502.05773v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05769">arXiv:2502.05769</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.05769">pdf</a>, <a href="https://arxiv.org/format/2502.05769">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Digital Twin Buildings: 3D Modeling, GIS Integration, and Visual Descriptions Using Gaussian Splatting, ChatGPT/Deepseek, and Google Maps Platform </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gao%2C+K">Kyle Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+D">Dening Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+L">Liangzhi Li</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+N">Nan Chen</a>, <a href="/search/cs?searchtype=author&amp;query=He%2C+H">Hongjie He</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+L">Linlin Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jonathan Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05769v2-abstract-short" style="display: inline;"> Urban digital twins are virtual replicas of cities that use multi-source data and data analytics to optimize urban planning, infrastructure management, and decision-making. Towards this, we propose a framework focused on the single-building scale. By connecting to cloud mapping platforms such as Google Map Platforms APIs, by leveraging state-of-the-art multi-agent Large Language Models data analys&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05769v2-abstract-full').style.display = 'inline'; document.getElementById('2502.05769v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05769v2-abstract-full" style="display: none;"> Urban digital twins are virtual replicas of cities that use multi-source data and data analytics to optimize urban planning, infrastructure management, and decision-making. Towards this, we propose a framework focused on the single-building scale. By connecting to cloud mapping platforms such as Google Map Platforms APIs, by leveraging state-of-the-art multi-agent Large Language Models data analysis using ChatGPT(4o) and Deepseek-V3/R1, and by using our Gaussian Splatting-based mesh extraction pipeline, our Digital Twin Buildings framework can retrieve a building&#39;s 3D model, visual descriptions, and achieve cloud-based mapping integration with large language model-based data analytics using a building&#39;s address, postal code, or geographic coordinates. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05769v2-abstract-full').style.display = 'none'; document.getElementById('2502.05769v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">-Fixed minor typo</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05763">arXiv:2502.05763</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.05763">pdf</a>, <a href="https://arxiv.org/format/2502.05763">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Public DNS Resolvers Meet Content Delivery Networks: A Performance Assessment of the Interplay </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kernan%2C+N">Nicholas Kernan</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Joey Li</a>, <a href="/search/cs?searchtype=author&amp;query=Al-Dalky%2C+R">Rami Al-Dalky</a>, <a href="/search/cs?searchtype=author&amp;query=Rabinovich%2C+M">Michael Rabinovich</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05763v1-abstract-short" style="display: inline;"> This paper investigates two key performance aspects of the interplay between public DNS resolution services and content delivery networks -- the latency of DNS queries for resolving CDN-accelerated hostnames and the latency between the end-user and the CDN&#39;s edge server obtained by the user through a given resolution service. While these important issues have been considered in the past, significa&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05763v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05763v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05763v1-abstract-full" style="display: none;"> This paper investigates two key performance aspects of the interplay between public DNS resolution services and content delivery networks -- the latency of DNS queries for resolving CDN-accelerated hostnames and the latency between the end-user and the CDN&#39;s edge server obtained by the user through a given resolution service. While these important issues have been considered in the past, significant developments, such as the IPv6 finally getting traction, the adoption of the ECS extension to DNS by major DNS resolution services, and the embracing of anycast by some CDNs warrant a reassessment under these new realities. Among the resolution services we consider, We find Google DNS and OpenDNS to lag behind the Cloudflare resolver and, for some CDNs, Quad9 in terms of DNS latency, and trace the cause to drastically lower cache hit rates. At the same time, we find that Google and OpenDNS have largely closed the gap with ISP resolvers in the quality of CDNs&#39;client-to-edge-server mappings as measured by latency, while the Cloudflare resolver still shows some penalty with Akamai, and Quad9 exhibits a noticeable penalty with three of the four CDNs in the study, keeping up only for Cloudflare CDN that does not use DNS to map clients to servers. Finally, in several locations, we observe IPv6 penalty in the latency of client-to-CDN-edge-server mappings produced by the resolvers. Moreover, this penalty does not rise above typical thresholds employed by the Happy Eyeballs algorithm for falling back to IPv4 communication. Thus, dual-stacked clients in these locations may experience suboptimal performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05763v1-abstract-full').style.display = 'none'; document.getElementById('2502.05763v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05547">arXiv:2502.05547</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.05547">pdf</a>, <a href="https://arxiv.org/format/2502.05547">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Dual Defense: Enhancing Privacy and Mitigating Poisoning Attacks in Federated Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xu%2C+R">Runhua Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+S">Shiqi Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+C">Chao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Joshi%2C+J">James Joshi</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jianxin Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05547v1-abstract-short" style="display: inline;"> Federated learning (FL) is inherently susceptible to privacy breaches and poisoning attacks. To tackle these challenges, researchers have separately devised secure aggregation mechanisms to protect data privacy and robust aggregation methods that withstand poisoning attacks. However, simultaneously addressing both concerns is challenging; secure aggregation facilitates poisoning attacks as most an&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05547v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05547v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05547v1-abstract-full" style="display: none;"> Federated learning (FL) is inherently susceptible to privacy breaches and poisoning attacks. To tackle these challenges, researchers have separately devised secure aggregation mechanisms to protect data privacy and robust aggregation methods that withstand poisoning attacks. However, simultaneously addressing both concerns is challenging; secure aggregation facilitates poisoning attacks as most anomaly detection techniques require access to unencrypted local model updates, which are obscured by secure aggregation. Few recent efforts to simultaneously tackle both challenges offen depend on impractical assumption of non-colluding two-server setups that disrupt FL&#39;s topology, or three-party computation which introduces scalability issues, complicating deployment and application. To overcome this dilemma, this paper introduce a Dual Defense Federated learning (DDFed) framework. DDFed simultaneously boosts privacy protection and mitigates poisoning attacks, without introducing new participant roles or disrupting the existing FL topology. DDFed initially leverages cutting-edge fully homomorphic encryption (FHE) to securely aggregate model updates, without the impractical requirement for non-colluding two-server setups and ensures strong privacy protection. Additionally, we proposes a unique two-phase anomaly detection mechanism for encrypted model updates, featuring secure similarity computation and feedback-driven collaborative selection, with additional measures to prevent potential privacy breaches from Byzantine clients incorporated into the detection process. We conducted extensive experiments on various model poisoning attacks and FL scenarios, including both cross-device and cross-silo FL. Experiments on publicly available datasets demonstrate that DDFed successfully protects model privacy and effectively defends against model poisoning threats. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05547v1-abstract-full').style.display = 'none'; document.getElementById('2502.05547v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by The Thirty-eighth Annual Conference on Neural Information Processing Systems (NeurIPS 2024)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05236">arXiv:2502.05236</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.05236">pdf</a>, <a href="https://arxiv.org/format/2502.05236">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Koel-TTS: Enhancing LLM based Speech Generation with Preference Alignment and Classifier Free Guidance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hussain%2C+S">Shehzeen Hussain</a>, <a href="/search/cs?searchtype=author&amp;query=Neekhara%2C+P">Paarth Neekhara</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+X">Xuesong Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Casanova%2C+E">Edresson Casanova</a>, <a href="/search/cs?searchtype=author&amp;query=Ghosh%2C+S">Subhankar Ghosh</a>, <a href="/search/cs?searchtype=author&amp;query=Desta%2C+M+T">Mikyas T. Desta</a>, <a href="/search/cs?searchtype=author&amp;query=Fejgin%2C+R">Roy Fejgin</a>, <a href="/search/cs?searchtype=author&amp;query=Valle%2C+R">Rafael Valle</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jason Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05236v1-abstract-short" style="display: inline;"> While autoregressive speech token generation models produce speech with remarkable variety and naturalness, their inherent lack of controllability often results in issues such as hallucinations and undesired vocalizations that do not conform to conditioning inputs. We introduce Koel-TTS, a suite of enhanced encoder-decoder Transformer TTS models that address these challenges by incorporating prefe&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05236v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05236v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05236v1-abstract-full" style="display: none;"> While autoregressive speech token generation models produce speech with remarkable variety and naturalness, their inherent lack of controllability often results in issues such as hallucinations and undesired vocalizations that do not conform to conditioning inputs. We introduce Koel-TTS, a suite of enhanced encoder-decoder Transformer TTS models that address these challenges by incorporating preference alignment techniques guided by automatic speech recognition and speaker verification models. Additionally, we incorporate classifier-free guidance to further improve synthesis adherence to the transcript and reference speaker audio. Our experiments demonstrate that these optimizations significantly enhance target speaker similarity, intelligibility, and naturalness of synthesized speech. Notably, Koel-TTS directly maps text and context audio to acoustic tokens, and on the aforementioned metrics, outperforms state-of-the-art TTS models, despite being trained on a significantly smaller dataset. Audio samples and demos are available on our website. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05236v1-abstract-full').style.display = 'none'; document.getElementById('2502.05236v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05218">arXiv:2502.05218</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.05218">pdf</a>, <a href="https://arxiv.org/format/2502.05218">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Statistical Finance">q-fin.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FactorGCL: A Hypergraph-Based Factor Model with Temporal Residual Contrastive Learning for Stock Returns Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Duan%2C+Y">Yitong Duan</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Weiran Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jian Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05218v1-abstract-short" style="display: inline;"> As a fundamental method in economics and finance, the factor model has been extensively utilized in quantitative investment. In recent years, there has been a paradigm shift from traditional linear models with expert-designed factors to more flexible nonlinear machine learning-based models with data-driven factors, aiming to enhance the effectiveness of these factor models. However, due to the low&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05218v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05218v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05218v1-abstract-full" style="display: none;"> As a fundamental method in economics and finance, the factor model has been extensively utilized in quantitative investment. In recent years, there has been a paradigm shift from traditional linear models with expert-designed factors to more flexible nonlinear machine learning-based models with data-driven factors, aiming to enhance the effectiveness of these factor models. However, due to the low signal-to-noise ratio in market data, mining effective factors in data-driven models remains challenging. In this work, we propose a hypergraph-based factor model with temporal residual contrastive learning (FactorGCL) that employs a hypergraph structure to better capture high-order nonlinear relationships among stock returns and factors. To mine hidden factors that supplement human-designed prior factors for predicting stock returns, we design a cascading residual hypergraph architecture, in which the hidden factors are extracted from the residual information after removing the influence of prior factors. Additionally, we propose a temporal residual contrastive learning method to guide the extraction of effective and comprehensive hidden factors by contrasting stock-specific residual information over different time periods. Our extensive experiments on real stock market data demonstrate that FactorGCL not only outperforms existing state-of-the-art methods but also mines effective hidden factors for predicting stock returns. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05218v1-abstract-full').style.display = 'none'; document.getElementById('2502.05218v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Li%2C+J&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+J&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+J&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+J&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+J&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Li%2C+J&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10