CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;50 of 827 results for author: <span class="mathjax">Cheng, J</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Cheng%2C+J">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Cheng, J"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Cheng%2C+J&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Cheng, J"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Cheng%2C+J&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Cheng%2C+J&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Cheng%2C+J&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Cheng%2C+J&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Cheng%2C+J&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Cheng%2C+J&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.20491">arXiv:2503.20491</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2503.20491">pdf</a>, <a href="https://arxiv.org/format/2503.20491">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> VPO: Aligning Text-to-Video Generation Models with Prompt Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jiale Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Lyu%2C+R">Ruiliang Lyu</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+X">Xiaotao Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+X">Xiao Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+J">Jiazheng Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+Y">Yida Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Teng%2C+J">Jiayan Teng</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Z">Zhuoyi Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Dong%2C+Y">Yuxiao Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+J">Jie Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hongning Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+M">Minlie Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.20491v1-abstract-short" style="display: inline;"> Video generation models have achieved remarkable progress in text-to-video tasks. These models are typically trained on text-video pairs with highly detailed and carefully crafted descriptions, while real-world user inputs during inference are often concise, vague, or poorly structured. This gap makes prompt optimization crucial for generating high-quality videos. Current methods often rely on lar&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.20491v1-abstract-full').style.display = 'inline'; document.getElementById('2503.20491v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.20491v1-abstract-full" style="display: none;"> Video generation models have achieved remarkable progress in text-to-video tasks. These models are typically trained on text-video pairs with highly detailed and carefully crafted descriptions, while real-world user inputs during inference are often concise, vague, or poorly structured. This gap makes prompt optimization crucial for generating high-quality videos. Current methods often rely on large language models (LLMs) to refine prompts through in-context learning, but suffer from several limitations: they may distort user intent, omit critical details, or introduce safety risks. Moreover, they optimize prompts without considering the impact on the final video quality, which can lead to suboptimal results. To address these issues, we introduce VPO, a principled framework that optimizes prompts based on three core principles: harmlessness, accuracy, and helpfulness. The generated prompts faithfully preserve user intents and, more importantly, enhance the safety and quality of generated videos. To achieve this, VPO employs a two-stage optimization approach. First, we construct and refine a supervised fine-tuning (SFT) dataset based on principles of safety and alignment. Second, we introduce both text-level and video-level feedback to further optimize the SFT model with preference learning. Our extensive experiments demonstrate that VPO significantly improves safety, alignment, and video quality compared to baseline methods. Moreover, VPO shows strong generalization across video generation models. Furthermore, we demonstrate that VPO could outperform and be combined with RLHF methods on video generation models, underscoring the effectiveness of VPO in aligning video generation models. Our code and data are publicly available at https://github.com/thu-coai/VPO. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.20491v1-abstract-full').style.display = 'none'; document.getElementById('2503.20491v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.20176">arXiv:2503.20176</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2503.20176">pdf</a>, <a href="https://arxiv.org/format/2503.20176">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Offline Reinforcement Learning with Discrete Diffusion Skills </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qiao%2C+R">RuiXi Qiao</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jie Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Dai%2C+X">Xingyuan Dai</a>, <a href="/search/cs?searchtype=author&amp;query=Tian%2C+Y">Yonglin Tian</a>, <a href="/search/cs?searchtype=author&amp;query=Lv%2C+Y">Yisheng Lv</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.20176v1-abstract-short" style="display: inline;"> Skills have been introduced to offline reinforcement learning (RL) as temporal abstractions to tackle complex, long-horizon tasks, promoting consistent behavior and enabling meaningful exploration. While skills in offline RL are predominantly modeled within a continuous latent space, the potential of discrete skill spaces remains largely underexplored. In this paper, we propose a compact discrete&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.20176v1-abstract-full').style.display = 'inline'; document.getElementById('2503.20176v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.20176v1-abstract-full" style="display: none;"> Skills have been introduced to offline reinforcement learning (RL) as temporal abstractions to tackle complex, long-horizon tasks, promoting consistent behavior and enabling meaningful exploration. While skills in offline RL are predominantly modeled within a continuous latent space, the potential of discrete skill spaces remains largely underexplored. In this paper, we propose a compact discrete skill space for offline RL tasks supported by state-of-the-art transformer-based encoder and diffusion-based decoder. Coupled with a high-level policy trained via offline RL techniques, our method establishes a hierarchical RL framework where the trained diffusion decoder plays a pivotal role. Empirical evaluations show that the proposed algorithm, Discrete Diffusion Skill (DDS), is a powerful offline RL method. DDS performs competitively on Locomotion and Kitchen tasks and excels on long-horizon tasks, achieving at least a 12 percent improvement on AntMaze-v2 benchmarks compared to existing offline RL approaches. Furthermore, DDS offers improved interpretability, training stability, and online exploration compared to previous skill-based methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.20176v1-abstract-full').style.display = 'none'; document.getElementById('2503.20176v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.19260">arXiv:2503.19260</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2503.19260">pdf</a>, <a href="https://arxiv.org/format/2503.19260">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Linguistic Blind Spots of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jiali Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Amiri%2C+H">Hadi Amiri</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.19260v1-abstract-short" style="display: inline;"> Large language models (LLMs) are the foundation of many AI applications today. However, despite their remarkable proficiency in generating coherent text, questions linger regarding their ability to perform fine-grained linguistic annotation tasks, such as detecting nouns or verbs, or identifying more complex syntactic structures like clauses in input texts. These tasks require precise syntactic an&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.19260v1-abstract-full').style.display = 'inline'; document.getElementById('2503.19260v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.19260v1-abstract-full" style="display: none;"> Large language models (LLMs) are the foundation of many AI applications today. However, despite their remarkable proficiency in generating coherent text, questions linger regarding their ability to perform fine-grained linguistic annotation tasks, such as detecting nouns or verbs, or identifying more complex syntactic structures like clauses in input texts. These tasks require precise syntactic and semantic understanding of input text, and when LLMs underperform on specific linguistic structures, it raises concerns about their reliability for detailed linguistic analysis and whether their (even correct) outputs truly reflect an understanding of the inputs. In this paper, we empirically study the performance of recent LLMs on fine-grained linguistic annotation tasks. Through a series of experiments, we find that recent LLMs show limited efficacy in addressing linguistic queries and often struggle with linguistically complex inputs. We show that the most capable LLM (Llama3-70b) makes notable errors in detecting linguistic structures, such as misidentifying embedded clauses, failing to recognize verb phrases, and confusing complex nominals with clauses. Our results provide insights to inform future advancements in LLM design and development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.19260v1-abstract-full').style.display = 'none'; document.getElementById('2503.19260v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NAACL 2025 Cognitive Modeling and Computational Linguistics Workshop</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> NAACL 2025 CMCL Workshop </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.18792">arXiv:2503.18792</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2503.18792">pdf</a>, <a href="https://arxiv.org/format/2503.18792">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> REALM: A Dataset of Real-World LLM Use Cases </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jingwen Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Ghate%2C+K">Kshitish Ghate</a>, <a href="/search/cs?searchtype=author&amp;query=Hua%2C+W">Wenyue Hua</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W+Y">William Yang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Shen%2C+H">Hong Shen</a>, <a href="/search/cs?searchtype=author&amp;query=Fang%2C+F">Fei Fang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.18792v1-abstract-short" style="display: inline;"> Large Language Models, such as the GPT series, have driven significant industrial applications, leading to economic and societal transformations. However, a comprehensive understanding of their real-world applications remains limited. To address this, we introduce REALM, a dataset of over 94,000 LLM use cases collected from Reddit and news articles. REALM captures two key dimensions: the diverse a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.18792v1-abstract-full').style.display = 'inline'; document.getElementById('2503.18792v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.18792v1-abstract-full" style="display: none;"> Large Language Models, such as the GPT series, have driven significant industrial applications, leading to economic and societal transformations. However, a comprehensive understanding of their real-world applications remains limited. To address this, we introduce REALM, a dataset of over 94,000 LLM use cases collected from Reddit and news articles. REALM captures two key dimensions: the diverse applications of LLMs and the demographics of their users. It categorizes LLM applications and explores how users&#39; occupations relate to the types of applications they use. By integrating real-world data, REALM offers insights into LLM adoption across different domains, providing a foundation for future research on their evolving societal roles. A dedicated dashboard https://realm-e7682.web.app/ presents the data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.18792v1-abstract-full').style.display = 'none'; document.getElementById('2503.18792v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.18773">arXiv:2503.18773</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2503.18773">pdf</a>, <a href="https://arxiv.org/format/2503.18773">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> BitDecoding: Unlocking Tensor Cores for Long-Context LLMs Decoding with Low-Bit KV Cache </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Du%2C+D">Dayou Du</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+S">Shijie Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jianyi Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+T">Ting Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+M">Mao Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.18773v1-abstract-short" style="display: inline;"> The growing adoption of long-context Large Language Models (LLMs) has introduced significant memory and computational challenges in autoregressive decoding due to the expanding Key-Value (KV) cache. KV cache quantization has emerged as a promising solution, with prior work showing that 4-bit or even 2-bit quantization can maintain model accuracy while reducing memory costs. However, despite these&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.18773v1-abstract-full').style.display = 'inline'; document.getElementById('2503.18773v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.18773v1-abstract-full" style="display: none;"> The growing adoption of long-context Large Language Models (LLMs) has introduced significant memory and computational challenges in autoregressive decoding due to the expanding Key-Value (KV) cache. KV cache quantization has emerged as a promising solution, with prior work showing that 4-bit or even 2-bit quantization can maintain model accuracy while reducing memory costs. However, despite these benefits, preliminary implementations for the low-bit KV cache struggle to deliver the expected speedup due to quantization and dequantization overheads and the lack of Tensor Cores utilization. In this work, we propose BitDecoding, a GPU-optimized framework that unlocks Tensor Cores for efficient decoding with low-bit KV cache. Efficiently leveraging Tensor Cores for low-bit KV cache is challenging due to the dynamic nature of KV cache generation at each decoding step. BitDecoding addresses these challenges with a Tensor Cores-Centric BitFusion Scheme that ensures data layout compatibility to enable high utilization of Tensor Cores. Additionally, BitDecoding incorporates a warp-efficient parallel decoding kernel and a fine-grained asynchronous pipeline, minimizing dequantization overhead and improving computational efficiency. Experiments show that BitDecoding achieves up to 7.5x speedup on RTX 4090, 4.8x on A100, and 8.9x on H100, compared to FP16 FlashDecoding-v2. It also outperforms the state-of-the-art low-bit KV cache implementation (QServe) by up to 4.3x. On LLaMA-3.1-8B with a 128K sequence length, BitDecoding reduces single-batch decoding latency by 3x, demonstrating its effectiveness in long-context generation scenarios. The code is available at https://github.com/DD-DuDa/BitDecoding. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.18773v1-abstract-full').style.display = 'none'; document.getElementById('2503.18773v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.18197">arXiv:2503.18197</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2503.18197">pdf</a>, <a href="https://arxiv.org/format/2503.18197">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> FROG: Fair Removal on Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Ziheng Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jiali Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Tolomei%2C+G">Gabriele Tolomei</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+S">Sijia Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Amiri%2C+H">Hadi Amiri</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Nag%2C+K">Kaushiki Nag</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+L">Lu Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.18197v1-abstract-short" style="display: inline;"> As compliance with privacy regulations becomes increasingly critical, the growing demand for data privacy has highlighted the significance of machine unlearning in many real world applications, such as social network and recommender systems, many of which can be represented as graph-structured data. However, existing graph unlearning algorithms indiscriminately modify edges or nodes from well-trai&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.18197v1-abstract-full').style.display = 'inline'; document.getElementById('2503.18197v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.18197v1-abstract-full" style="display: none;"> As compliance with privacy regulations becomes increasingly critical, the growing demand for data privacy has highlighted the significance of machine unlearning in many real world applications, such as social network and recommender systems, many of which can be represented as graph-structured data. However, existing graph unlearning algorithms indiscriminately modify edges or nodes from well-trained models without considering the potential impact of such structural modifications on fairness. For example, forgetting links between nodes with different genders in a social network may exacerbate group disparities, leading to significant fairness concerns. To address these challenges, we propose a novel approach that jointly optimizes the graph structure and the corresponding model for fair unlearning tasks. Specifically,our approach rewires the graph to enhance unlearning efficiency by removing redundant edges that hinder forgetting while preserving fairness through targeted edge augmentation. Additionally, we introduce a worst-case evaluation mechanism to assess the reliability of fair unlearning performance. Extensive experiments on real-world datasets demonstrate the effectiveness of the proposed approach in achieving superior unlearning outcomes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.18197v1-abstract-full').style.display = 'none'; document.getElementById('2503.18197v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.17632">arXiv:2503.17632</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2503.17632">pdf</a>, <a href="https://arxiv.org/format/2503.17632">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> FairFlow: Mitigating Dataset Biases through Undecided Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jiali Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Amiri%2C+H">Hadi Amiri</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.17632v1-abstract-short" style="display: inline;"> Language models are prone to dataset biases, known as shortcuts and spurious correlations in data, which often result in performance drop on new data. We present a new debiasing framework called ``FairFlow&#39;&#39; that mitigates dataset biases by learning to be undecided in its predictions for data samples or representations associated with known or unknown biases. The framework introduces two key compo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.17632v1-abstract-full').style.display = 'inline'; document.getElementById('2503.17632v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.17632v1-abstract-full" style="display: none;"> Language models are prone to dataset biases, known as shortcuts and spurious correlations in data, which often result in performance drop on new data. We present a new debiasing framework called ``FairFlow&#39;&#39; that mitigates dataset biases by learning to be undecided in its predictions for data samples or representations associated with known or unknown biases. The framework introduces two key components: a suite of data and model perturbation operations that generate different biased views of input samples, and a contrastive objective that learns debiased and robust representations from the resulting biased views of samples. Experiments show that FairFlow outperforms existing debiasing methods, particularly against out-of-domain and hard test samples without compromising the in-domain performance <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.17632v1-abstract-full').style.display = 'none'; document.getElementById('2503.17632v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">EMNLP 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> EMNLP 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.13716">arXiv:2503.13716</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2503.13716">pdf</a>, <a href="https://arxiv.org/format/2503.13716">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> 16 Ways to Gallop: Energetics and Body Dynamics of High-Speed Quadrupedal Gaits </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Alqaham%2C+Y+G">Yasser G. Alqaham</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jing Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Gan%2C+Z">Zhenyu Gan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.13716v1-abstract-short" style="display: inline;"> Galloping is a common high-speed gait in both animals and quadrupedal robots, yet its energetic characteristics remain insufficiently explored. This study systematically analyzes a large number of possible galloping gaits by categorizing them based on the number of flight phases per stride and the phase relationships between the front and rear legs, following Hildebrand&#39;s framework for asymmetrica&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.13716v1-abstract-full').style.display = 'inline'; document.getElementById('2503.13716v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.13716v1-abstract-full" style="display: none;"> Galloping is a common high-speed gait in both animals and quadrupedal robots, yet its energetic characteristics remain insufficiently explored. This study systematically analyzes a large number of possible galloping gaits by categorizing them based on the number of flight phases per stride and the phase relationships between the front and rear legs, following Hildebrand&#39;s framework for asymmetrical gaits. Using the A1 quadrupedal robot from Unitree, we model galloping dynamics as a hybrid dynamical system and employ trajectory optimization (TO) to minimize the cost of transport (CoT) across a range of speeds. Our results reveal that rotary and transverse gallop footfall sequences exhibit no fundamental energetic difference, despite variations in body yaw and roll motion. However, the number of flight phases significantly impacts energy efficiency: galloping with no flight phases is optimal at lower speeds, whereas galloping with two flight phases minimizes energy consumption at higher speeds. We validate these findings using a quadratic programming (QP)-based controller, developed in our previous work, in Gazebo simulations. These insights advance the understanding of quadrupedal locomotion energetics and may inform future legged robot designs for adaptive, energy-efficient gait transitions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.13716v1-abstract-full').style.display = 'none'; document.getElementById('2503.13716v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.12460">arXiv:2503.12460</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2503.12460">pdf</a>, <a href="https://arxiv.org/format/2503.12460">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Exploring Contextual Attribute Density in Referring Expression Counting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zhicheng Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+Z">Zhiyu Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Peng%2C+Z">Zhan Peng</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jian Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Xiao%2C+L">Liwen Xiao</a>, <a href="/search/cs?searchtype=author&amp;query=Jiang%2C+W">Wei Jiang</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+Z">Zhiguo Cao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.12460v1-abstract-short" style="display: inline;"> Referring expression counting (REC) algorithms are for more flexible and interactive counting ability across varied fine-grained text expressions. However, the requirement for fine-grained attribute understanding poses challenges for prior arts, as they struggle to accurately align attribute information with correct visual patterns. Given the proven importance of &#39;&#39;visual density&#39;&#39;, it is presumed&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.12460v1-abstract-full').style.display = 'inline'; document.getElementById('2503.12460v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.12460v1-abstract-full" style="display: none;"> Referring expression counting (REC) algorithms are for more flexible and interactive counting ability across varied fine-grained text expressions. However, the requirement for fine-grained attribute understanding poses challenges for prior arts, as they struggle to accurately align attribute information with correct visual patterns. Given the proven importance of &#39;&#39;visual density&#39;&#39;, it is presumed that the limitations of current REC approaches stem from an under-exploration of &#39;&#39;contextual attribute density&#39;&#39; (CAD). In the scope of REC, we define CAD as the measure of the information intensity of one certain fine-grained attribute in visual regions. To model the CAD, we propose a U-shape CAD estimator in which referring expression and multi-scale visual features from GroundingDINO can interact with each other. With additional density supervision, we can effectively encode CAD, which is subsequently decoded via a novel attention procedure with CAD-refined queries. Integrating all these contributions, our framework significantly outperforms state-of-the-art REC methods, achieves $30\%$ error reduction in counting metrics and a $10\%$ improvement in localization accuracy. The surprising results shed light on the significance of contextual attribute density for REC. Code will be at github.com/Xu3XiWang/CAD-GD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.12460v1-abstract-full').style.display = 'none'; document.getElementById('2503.12460v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">CVPR25</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.10150">arXiv:2503.10150</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2503.10150">pdf</a>, <a href="https://arxiv.org/format/2503.10150">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Retrieval-Augmented Generation with Hierarchical Knowledge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Huang%2C+H">Haoyu Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yongfeng Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+J">Junjie Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Pan%2C+Z">Zhenyu Pan</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yongqiang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+K">Kaili Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+H">Hongzhi Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">James Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.10150v1-abstract-short" style="display: inline;"> Graph-based Retrieval-Augmented Generation (RAG) methods have significantly enhanced the performance of large language models (LLMs) in domain-specific tasks. However, existing RAG methods do not adequately utilize the naturally inherent hierarchical knowledge in human cognition, which limits the capabilities of RAG systems. In this paper, we introduce a new RAG approach, called HiRAG, which utili&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.10150v1-abstract-full').style.display = 'inline'; document.getElementById('2503.10150v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.10150v1-abstract-full" style="display: none;"> Graph-based Retrieval-Augmented Generation (RAG) methods have significantly enhanced the performance of large language models (LLMs) in domain-specific tasks. However, existing RAG methods do not adequately utilize the naturally inherent hierarchical knowledge in human cognition, which limits the capabilities of RAG systems. In this paper, we introduce a new RAG approach, called HiRAG, which utilizes hierarchical knowledge to enhance the semantic understanding and structure capturing capabilities of RAG systems in the indexing and retrieval processes. Our extensive experiments demonstrate that HiRAG achieves significant performance improvements over the state-of-the-art baseline methods. The code of our proposed method is available at \href{https://github.com/hhy-huang/HiRAG}{https://github.com/hhy-huang/HiRAG}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.10150v1-abstract-full').style.display = 'none'; document.getElementById('2503.10150v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.05734">arXiv:2503.05734</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2503.05734">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Modeling Behavior Change for Multi-model At-Risk Students Early Prediction (extended version) </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jiabei Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Z">Zhen-Qun Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+J">Jiannong Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+Y">Yu Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Poon%2C+K+C+F">Kai Cheung Franky Poon</a>, <a href="/search/cs?searchtype=author&amp;query=Lai%2C+D">Daniel Lai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.05734v1-abstract-short" style="display: inline;"> In the educational domain, identifying students at risk of dropping out is essential for allowing educators to intervene effectively, improving both academic outcomes and overall student well-being. Data in educational settings often originate from diverse sources, such as assignments, grades, and attendance records. However, most existing research relies on online learning data and just extractin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.05734v1-abstract-full').style.display = 'inline'; document.getElementById('2503.05734v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.05734v1-abstract-full" style="display: none;"> In the educational domain, identifying students at risk of dropping out is essential for allowing educators to intervene effectively, improving both academic outcomes and overall student well-being. Data in educational settings often originate from diverse sources, such as assignments, grades, and attendance records. However, most existing research relies on online learning data and just extracting the quantitative features. While quantification eases processing, it also leads to a significant loss of original information. Moreover, current models primarily identify students with consistently poor performance through simple and discrete behavioural patterns, failing to capture the complex continuity and non-linear changes in student behaviour. We have developed an innovative prediction model, Multimodal- ChangePoint Detection (MCPD), utilizing the textual teacher remark data and numerical grade data from middle schools. Our model achieves a highly integrated and intelligent analysis by using independent encoders to process two data types, fusing the encoded feature. The model further refines its analysis by leveraging a changepoint detection module to pinpoint crucial behavioral changes, which are integrated as dynamic weights through a simple attention mechanism. Experimental validations indicate that our model achieves an accuracy range of 70- 75%, with an average outperforming baseline algorithms by approximately 5-10%. Additionally, our algorithm demonstrates a certain degree of transferability, maintaining high accuracy when adjusted and retrained with different definitions of at-risk, proving its broad applicability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.05734v1-abstract-full').style.display = 'none'; document.getElementById('2503.05734v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.04037">arXiv:2503.04037</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2503.04037">pdf</a>, <a href="https://arxiv.org/format/2503.04037">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Beyond Existance: Fulfill 3D Reconstructed Scenes with Pseudo Details </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gao%2C+Y">Yifei Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+J">Jun Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+L">Lei Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Dai%2C+R">Ruiting Dai</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jun Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.04037v1-abstract-short" style="display: inline;"> The emergence of 3D Gaussian Splatting (3D-GS) has significantly advanced 3D reconstruction by providing high fidelity and fast training speeds across various scenarios. While recent efforts have mainly focused on improving model structures to compress data volume or reduce artifacts during zoom-in and zoom-out operations, they often overlook an underlying issue: training sampling deficiency. In z&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.04037v1-abstract-full').style.display = 'inline'; document.getElementById('2503.04037v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.04037v1-abstract-full" style="display: none;"> The emergence of 3D Gaussian Splatting (3D-GS) has significantly advanced 3D reconstruction by providing high fidelity and fast training speeds across various scenarios. While recent efforts have mainly focused on improving model structures to compress data volume or reduce artifacts during zoom-in and zoom-out operations, they often overlook an underlying issue: training sampling deficiency. In zoomed-in views, Gaussian primitives can appear unregulated and distorted due to their dilation limitations and the insufficient availability of scale-specific training samples. Consequently, incorporating pseudo-details that ensure the completeness and alignment of the scene becomes essential. In this paper, we introduce a new training method that integrates diffusion models and multi-scale training using pseudo-ground-truth data. This approach not only notably mitigates the dilation and zoomed-in artifacts but also enriches reconstructed scenes with precise details out of existing scenarios. Our method achieves state-of-the-art performance across various benchmarks and extends the capabilities of 3D reconstruction beyond training datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.04037v1-abstract-full').style.display = 'none'; document.getElementById('2503.04037v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> I.3.5 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.03259">arXiv:2503.03259</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2503.03259">pdf</a>, <a href="https://arxiv.org/format/2503.03259">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> BANet: Bilateral Aggregation Network for Mobile Stereo Matching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xu%2C+G">Gangwei Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Jiaxin Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xianqi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Junda Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+Y">Yong Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Zang%2C+J">Jinliang Zang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yurui Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+X">Xin Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.03259v1-abstract-short" style="display: inline;"> State-of-the-art stereo matching methods typically use costly 3D convolutions to aggregate a full cost volume, but their computational demands make mobile deployment challenging. Directly applying 2D convolutions for cost aggregation often results in edge blurring, detail loss, and mismatches in textureless regions. Some complex operations, like deformable convolutions and iterative warping, can p&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.03259v1-abstract-full').style.display = 'inline'; document.getElementById('2503.03259v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.03259v1-abstract-full" style="display: none;"> State-of-the-art stereo matching methods typically use costly 3D convolutions to aggregate a full cost volume, but their computational demands make mobile deployment challenging. Directly applying 2D convolutions for cost aggregation often results in edge blurring, detail loss, and mismatches in textureless regions. Some complex operations, like deformable convolutions and iterative warping, can partially alleviate this issue; however, they are not mobile-friendly, limiting their deployment on mobile devices. In this paper, we present a novel bilateral aggregation network (BANet) for mobile stereo matching that produces high-quality results with sharp edges and fine details using only 2D convolutions. Specifically, we first separate the full cost volume into detailed and smooth volumes using a spatial attention map, then perform detailed and smooth aggregations accordingly, ultimately fusing both to obtain the final disparity map. Additionally, to accurately identify high-frequency detailed regions and low-frequency smooth/textureless regions, we propose a new scale-aware spatial attention module. Experimental results demonstrate that our BANet-2D significantly outperforms other mobile-friendly methods, achieving 35.3\% higher accuracy on the KITTI 2015 leaderboard than MobileStereoNet-2D, with faster runtime on mobile devices. The extended 3D version, BANet-3D, achieves the highest accuracy among all real-time methods on high-end GPUs. Code: \textcolor{magenta}{https://github.com/gangweiX/BANet}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.03259v1-abstract-full').style.display = 'none'; document.getElementById('2503.03259v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.00392">arXiv:2503.00392</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2503.00392">pdf</a>, <a href="https://arxiv.org/format/2503.00392">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Progressive Sparse Attention: Algorithm and System Co-design for Efficient Attention in LLM Serving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+Q">Qihui Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Yin%2C+P">Peiqi Yin</a>, <a href="/search/cs?searchtype=author&amp;query=Zuo%2C+P">Pengfei Zuo</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">James Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.00392v1-abstract-short" style="display: inline;"> Processing long contexts has become a critical capability for modern large language models (LLMs). However, serving long-context LLMs comes with significant inference costs due to the high memory overhead of the key-value (KV) cache. Existing work leverages dynamic sparse attention algorithms (DSAes) to mitigate the KV cache overhead, but these algorithms rely on top-$k$ KV cache selection, which&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.00392v1-abstract-full').style.display = 'inline'; document.getElementById('2503.00392v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.00392v1-abstract-full" style="display: none;"> Processing long contexts has become a critical capability for modern large language models (LLMs). However, serving long-context LLMs comes with significant inference costs due to the high memory overhead of the key-value (KV) cache. Existing work leverages dynamic sparse attention algorithms (DSAes) to mitigate the KV cache overhead, but these algorithms rely on top-$k$ KV cache selection, which results in a trade-off between accuracy and efficiency. A larger $k$ improves accuracy but decreases efficiency, while a smaller $k$ boosts efficiency but compromises accuracy. To overcome this trade-off, this paper presents PSA, a $\underline{P}$rogressive $\underline{S}$parse $\underline{A}$ttention mechanism that integrates algorithmic innovations with system co-design to achieve both high inference accuracy and improved efficiency in LLM serving. The PSA algorithm adaptively adjusts the KV cache budget of different tokens and layers according to their real attention weight distributions, rather than relying on a fixed budget $k$. This enables high accuracy while minimizing KV cache usage. To further enhance execution efficiency, we introduce a pipelined iteration scheme that reduces CPU-GPU interleaving and synchronization overhead during PSA computation. Additionally, we implement unified GPU memory management that optimizes PSA&#39;s memory utilization by accounting for uneven memory requirements across different model layers. Extensive experimental results demonstrate that PSA reduces KV cache usage for attention computation by up to 2.4$\times$ and 8.8$\times$, and increases end-to-end serving throughput by up to 1.4$\times$ and 2.0$\times$, compared to state-of-the-art DSAes and systems without sparse attention, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.00392v1-abstract-full').style.display = 'none'; document.getElementById('2503.00392v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.19750">arXiv:2502.19750</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.19750">pdf</a>, <a href="https://arxiv.org/format/2502.19750">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CirT: Global Subseasonal-to-Seasonal Forecasting with Geometry-inspired Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+Z">Zinan Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jiashun Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Tsung%2C+F">Fugee Tsung</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+D">Deli Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Rong%2C+Y">Yu Rong</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jia Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.19750v1-abstract-short" style="display: inline;"> Accurate Subseasonal-to-Seasonal (S2S) climate forecasting is pivotal for decision-making including agriculture planning and disaster preparedness but is known to be challenging due to its chaotic nature. Although recent data-driven models have shown promising results, their performance is limited by inadequate consideration of geometric inductive biases. Usually, they treat the spherical weather&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.19750v1-abstract-full').style.display = 'inline'; document.getElementById('2502.19750v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.19750v1-abstract-full" style="display: none;"> Accurate Subseasonal-to-Seasonal (S2S) climate forecasting is pivotal for decision-making including agriculture planning and disaster preparedness but is known to be challenging due to its chaotic nature. Although recent data-driven models have shown promising results, their performance is limited by inadequate consideration of geometric inductive biases. Usually, they treat the spherical weather data as planar images, resulting in an inaccurate representation of locations and spatial relations. In this work, we propose the geometric-inspired Circular Transformer (CirT) to model the cyclic characteristic of the graticule, consisting of two key designs: (1) Decomposing the weather data by latitude into circular patches that serve as input tokens to the Transformer; (2) Leveraging Fourier transform in self-attention to capture the global information and model the spatial periodicity. Extensive experiments on the Earth Reanalysis 5 (ERA5) reanalysis dataset demonstrate our model yields a significant improvement over the advanced data-driven models, including PanguWeather and GraphCast, as well as skillful ECMWF systems. Additionally, we empirically show the effectiveness of our model designs and high-quality prediction over spatial and temporal dimensions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.19750v1-abstract-full').style.display = 'none'; document.getElementById('2502.19750v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17933">arXiv:2502.17933</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.17933">pdf</a>, <a href="https://arxiv.org/format/2502.17933">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> 3D Anatomical Structure-guided Deep Learning for Accurate Diffusion Microstructure Imaging </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ma%2C+X">Xinrui Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jian Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+W">Wenxin Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+R">Ruoyou Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Ye%2C+Y">Yongquan Ye</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Shanshan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17933v1-abstract-short" style="display: inline;"> Diffusion magnetic resonance imaging (dMRI) is a crucial non-invasive technique for exploring the microstructure of the living human brain. Traditional hand-crafted and model-based tissue microstructure reconstruction methods often require extensive diffusion gradient sampling, which can be time-consuming and limits the clinical applicability of tissue microstructure information. Recent advances i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17933v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17933v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17933v1-abstract-full" style="display: none;"> Diffusion magnetic resonance imaging (dMRI) is a crucial non-invasive technique for exploring the microstructure of the living human brain. Traditional hand-crafted and model-based tissue microstructure reconstruction methods often require extensive diffusion gradient sampling, which can be time-consuming and limits the clinical applicability of tissue microstructure information. Recent advances in deep learning have shown promise in microstructure estimation; however, accurately estimating tissue microstructure from clinically feasible dMRI scans remains challenging without appropriate constraints. This paper introduces a novel framework that achieves high-fidelity and rapid diffusion microstructure imaging by simultaneously leveraging anatomical information from macro-level priors and mutual information across parameters. This approach enhances time efficiency while maintaining accuracy in microstructure estimation. Experimental results demonstrate that our method outperforms four state-of-the-art techniques, achieving a peak signal-to-noise ratio (PSNR) of 30.51$\pm$0.58 and a structural similarity index measure (SSIM) of 0.97$\pm$0.004 in estimating parametric maps of multiple diffusion models. Notably, our method achieves a 15$\times$ acceleration compared to the dense sampling approach, which typically utilizes 270 diffusion gradients. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17933v1-abstract-full').style.display = 'none'; document.getElementById('2502.17933v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17263">arXiv:2502.17263</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.17263">pdf</a>, <a href="https://arxiv.org/format/2502.17263">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3706599.3720063">10.1145/3706599.3720063 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Untold Stories: Unveiling the Scarce Contributions of UX Professionals to Usability Issue Discussions of Open Source Software Projects </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sanei%2C+A">Arghavan Sanei</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jinghui Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17263v2-abstract-short" style="display: inline;"> Previous work established that open source software (OSS) projects can benefit from the involvement of UX professionals, who offer user-centric perspectives and contributions to improve software usability. However, their participation in OSS issue discussions (places where design and implementation decisions are often made) is relatively scarce since those platforms are created with a developer-ce&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17263v2-abstract-full').style.display = 'inline'; document.getElementById('2502.17263v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17263v2-abstract-full" style="display: none;"> Previous work established that open source software (OSS) projects can benefit from the involvement of UX professionals, who offer user-centric perspectives and contributions to improve software usability. However, their participation in OSS issue discussions (places where design and implementation decisions are often made) is relatively scarce since those platforms are created with a developer-centric mindset. Analyzing a dataset sampled from five OSS projects, this study identifies UX professionals&#39; distinct approaches to raising and following up on usability issues. Compared to other contributors, UX professionals addressed a broader range of usability issues, well-supported their stances, and were more factual than emotional. They also actively engage in discussions to provide additional insights and clarifications in comments following up on the issues they posted. Results from this study provide useful insights for increasing UX professionals&#39; involvement in OSS communities to improve usability and end-user satisfaction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17263v2-abstract-full').style.display = 'none'; document.getElementById('2502.17263v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 4 figures, CHI 2025 LBW</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16971">arXiv:2502.16971</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.16971">pdf</a>, <a href="https://arxiv.org/format/2502.16971">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LongSafety: Evaluating Long-Context Safety of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lu%2C+Y">Yida Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jiale Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhexin Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Cui%2C+S">Shiyao Cui</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+C">Cunxiang Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Gu%2C+X">Xiaotao Gu</a>, <a href="/search/cs?searchtype=author&amp;query=Dong%2C+Y">Yuxiao Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+J">Jie Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hongning Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+M">Minlie Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16971v1-abstract-short" style="display: inline;"> As Large Language Models (LLMs) continue to advance in understanding and generating long sequences, new safety concerns have been introduced through the long context. However, the safety of LLMs in long-context tasks remains under-explored, leaving a significant gap in both evaluation and improvement of their safety. To address this, we introduce LongSafety, the first comprehensive benchmark speci&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16971v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16971v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16971v1-abstract-full" style="display: none;"> As Large Language Models (LLMs) continue to advance in understanding and generating long sequences, new safety concerns have been introduced through the long context. However, the safety of LLMs in long-context tasks remains under-explored, leaving a significant gap in both evaluation and improvement of their safety. To address this, we introduce LongSafety, the first comprehensive benchmark specifically designed to evaluate LLM safety in open-ended long-context tasks. LongSafety encompasses 7 categories of safety issues and 6 user-oriented long-context tasks, with a total of 1,543 test cases, averaging 5,424 words per context. Our evaluation towards 16 representative LLMs reveals significant safety vulnerabilities, with most models achieving safety rates below 55%. Our findings also indicate that strong safety performance in short-context scenarios does not necessarily correlate with safety in long-context tasks, emphasizing the unique challenges and urgency of improving long-context safety. Moreover, through extensive analysis, we identify challenging safety issues and task types for long-context models. Furthermore, we find that relevant context and extended input sequences can exacerbate safety risks in long-context scenarios, highlighting the critical need for ongoing attention to long-context safety challenges. Our code and data are available at https://github.com/thu-coai/LongSafety. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16971v1-abstract-full').style.display = 'none'; document.getElementById('2502.16971v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16907">arXiv:2502.16907</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.16907">pdf</a>, <a href="https://arxiv.org/format/2502.16907">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> MambaFlow: A Novel and Flow-guided State Space Model for Scene Flow Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luo%2C+J">Jiehao Luo</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jintao Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Tang%2C+X">Xiaoyu Tang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Q">Qingwen Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Xue%2C+B">Bohuan Xue</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+R">Rui Fan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16907v1-abstract-short" style="display: inline;"> Scene flow estimation aims to predict 3D motion from consecutive point cloud frames, which is of great interest in autonomous driving field. Existing methods face challenges such as insufficient spatio-temporal modeling and inherent loss of fine-grained feature during voxelization. However, the success of Mamba, a representative state space model (SSM) that enables global modeling with linear comp&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16907v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16907v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16907v1-abstract-full" style="display: none;"> Scene flow estimation aims to predict 3D motion from consecutive point cloud frames, which is of great interest in autonomous driving field. Existing methods face challenges such as insufficient spatio-temporal modeling and inherent loss of fine-grained feature during voxelization. However, the success of Mamba, a representative state space model (SSM) that enables global modeling with linear complexity, provides a promising solution. In this paper, we propose MambaFlow, a novel scene flow estimation network with a mamba-based decoder. It enables deep interaction and coupling of spatio-temporal features using a well-designed backbone. Innovatively, we steer the global attention modeling of voxel-based features with point offset information using an efficient Mamba-based decoder, learning voxel-to-point patterns that are used to devoxelize shared voxel representations into point-wise features. To further enhance the model&#39;s generalization capabilities across diverse scenarios, we propose a novel scene-adaptive loss function that automatically adapts to different motion patterns.Extensive experiments on the Argoverse 2 benchmark demonstrate that MambaFlow achieves state-of-the-art performance with real-time inference speed among existing works, enabling accurate flow estimation in real-world urban scenarios. The code is available at https://github.com/SCNU-RISLAB/MambaFlow. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16907v1-abstract-full').style.display = 'none'; document.getElementById('2502.16907v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14429">arXiv:2502.14429</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.14429">pdf</a>, <a href="https://arxiv.org/format/2502.14429">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Early-Exit and Instant Confidence Translation Quality Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zouhar%2C+V">Vil茅m Zouhar</a>, <a href="/search/cs?searchtype=author&amp;query=Z%C3%BCfle%2C+M">Maike Z眉fle</a>, <a href="/search/cs?searchtype=author&amp;query=Egressy%2C+B">Beni Egressy</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Julius Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Niehues%2C+J">Jan Niehues</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14429v1-abstract-short" style="display: inline;"> Quality estimation is omnipresent in machine translation, for both evaluation and generation. Unfortunately, quality estimation models are often opaque and computationally expensive, making them impractical to be part of large-scale pipelines. In this work, we tackle two connected challenges: (1) reducing the cost of quality estimation at scale, and (2) developing an inexpensive uncertainty estima&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14429v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14429v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14429v1-abstract-full" style="display: none;"> Quality estimation is omnipresent in machine translation, for both evaluation and generation. Unfortunately, quality estimation models are often opaque and computationally expensive, making them impractical to be part of large-scale pipelines. In this work, we tackle two connected challenges: (1) reducing the cost of quality estimation at scale, and (2) developing an inexpensive uncertainty estimation method for quality estimation. To address the latter, we introduce Instant Confidence COMET, an uncertainty-aware quality estimation model that matches the performance of previous approaches at a fraction of their costs. We extend this to Early-Exit COMET, a quality estimation model that can compute quality scores and associated confidences already at early model layers, allowing us to early-exit computations and reduce evaluation costs. We also apply our model to machine translation reranking. We combine Early-Exit COMET with an upper confidence bound bandit algorithm to find the best candidate from a large pool without having to run the full evaluation model on all candidates. In both cases (evaluation and reranking) our methods reduce the required compute by 50% with very little degradation in performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14429v1-abstract-full').style.display = 'none'; document.getElementById('2502.14429v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13962">arXiv:2502.13962</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.13962">pdf</a>, <a href="https://arxiv.org/format/2502.13962">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Is That Your Final Answer? Test-Time Scaling Improves Selective Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Jurayj%2C+W">William Jurayj</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jeffrey Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Van+Durme%2C+B">Benjamin Van Durme</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13962v1-abstract-short" style="display: inline;"> Scaling the test-time compute of large language models has demonstrated impressive performance on reasoning benchmarks. However, existing evaluations of test-time scaling make the strong assumption that a reasoning system should always give an answer to any question provided. This overlooks concerns about whether a model is confident in its answer, and whether it is appropriate to always provide a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13962v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13962v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13962v1-abstract-full" style="display: none;"> Scaling the test-time compute of large language models has demonstrated impressive performance on reasoning benchmarks. However, existing evaluations of test-time scaling make the strong assumption that a reasoning system should always give an answer to any question provided. This overlooks concerns about whether a model is confident in its answer, and whether it is appropriate to always provide a response. To address these concerns, we extract confidence scores during reasoning for thresholding model responses. We find that increasing compute budget at inference time not only helps models answer more questions correctly, but also increases confidence in correct responses. We then extend the current paradigm of zero-risk responses during evaluation by considering settings with non-zero levels of response risk, and suggest a recipe for reporting evaluations under these settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13962v1-abstract-full').style.display = 'none'; document.getElementById('2502.13962v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12674">arXiv:2502.12674</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.12674">pdf</a>, <a href="https://arxiv.org/format/2502.12674">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> SATA: Safe and Adaptive Torque-Based Locomotion Policies Inspired by Animal Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+P">Peizhuo Li</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Hongyi Li</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+G">Ge Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jin Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+X">Xinrong Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Bellegarda%2C+G">Guillaume Bellegarda</a>, <a href="/search/cs?searchtype=author&amp;query=Shafiee%2C+M">Milad Shafiee</a>, <a href="/search/cs?searchtype=author&amp;query=Cao%2C+Y">Yuhong Cao</a>, <a href="/search/cs?searchtype=author&amp;query=Ijspeert%2C+A">Auke Ijspeert</a>, <a href="/search/cs?searchtype=author&amp;query=Sartoretti%2C+G">Guillaume Sartoretti</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12674v1-abstract-short" style="display: inline;"> Despite recent advances in learning-based controllers for legged robots, deployments in human-centric environments remain limited by safety concerns. Most of these approaches use position-based control, where policies output target joint angles that must be processed by a low-level controller (e.g., PD or impedance controllers) to compute joint torques. Although impressive results have been achiev&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12674v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12674v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12674v1-abstract-full" style="display: none;"> Despite recent advances in learning-based controllers for legged robots, deployments in human-centric environments remain limited by safety concerns. Most of these approaches use position-based control, where policies output target joint angles that must be processed by a low-level controller (e.g., PD or impedance controllers) to compute joint torques. Although impressive results have been achieved in controlled real-world scenarios, these methods often struggle with compliance and adaptability when encountering environments or disturbances unseen during training, potentially resulting in extreme or unsafe behaviors. Inspired by how animals achieve smooth and adaptive movements by controlling muscle extension and contraction, torque-based policies offer a promising alternative by enabling precise and direct control of the actuators in torque space. In principle, this approach facilitates more effective interactions with the environment, resulting in safer and more adaptable behaviors. However, challenges such as a highly nonlinear state space and inefficient exploration during training have hindered their broader adoption. To address these limitations, we propose SATA, a bio-inspired framework that mimics key biomechanical principles and adaptive learning mechanisms observed in animal locomotion. Our approach effectively addresses the inherent challenges of learning torque-based policies by significantly improving early-stage exploration, leading to high-performance final policies. Remarkably, our method achieves zero-shot sim-to-real transfer. Our experimental results indicate that SATA demonstrates remarkable compliance and safety, even in challenging environments such as soft/slippery terrain or narrow passages, and under significant external disturbances, highlighting its potential for practical deployments in human-centric and safety-critical scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12674v1-abstract-full').style.display = 'none'; document.getElementById('2502.12674v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12413">arXiv:2502.12413</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.12413">pdf</a>, <a href="https://arxiv.org/format/2502.12413">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DivIL: Unveiling and Addressing Over-Invariance for Out-of- Distribution Generalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jiaqi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+Y">Yuhang Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhixiong Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Q">Qiguang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yongqiang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">James Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12413v1-abstract-short" style="display: inline;"> Out-of-distribution generalization is a common problem that expects the model to perform well in the different distributions even far from the train data. A popular approach to addressing this issue is invariant learning (IL), in which the model is compiled to focus on invariant features instead of spurious features by adding strong constraints during training. However, there are some potential pi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12413v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12413v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12413v1-abstract-full" style="display: none;"> Out-of-distribution generalization is a common problem that expects the model to perform well in the different distributions even far from the train data. A popular approach to addressing this issue is invariant learning (IL), in which the model is compiled to focus on invariant features instead of spurious features by adding strong constraints during training. However, there are some potential pitfalls of strong invariant constraints. Due to the limited number of diverse environments and over-regularization in the feature space, it may lead to a loss of important details in the invariant features while alleviating the spurious correlations, namely the over-invariance, which can also degrade the generalization performance. We theoretically define the over-invariance and observe that this issue occurs in various classic IL methods. To alleviate this issue, we propose a simple approach Diverse Invariant Learning (DivIL) by adding the unsupervised contrastive learning and the random masking mechanism compensatory for the invariant constraints, which can be applied to various IL methods. Furthermore, we conduct experiments across multiple modalities across 12 datasets and 6 classic models, verifying our over-invariance insight and the effectiveness of our DivIL framework. Our code is available at https://github.com/kokolerk/DivIL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12413v1-abstract-full').style.display = 'none'; document.getElementById('2502.12413v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11612">arXiv:2502.11612</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.11612">pdf</a>, <a href="https://arxiv.org/format/2502.11612">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Maximum Entropy Reinforcement Learning with Diffusion Policy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dong%2C+X">Xiaoyi Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jian Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+X+S">Xi Sheryl Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11612v2-abstract-short" style="display: inline;"> The Soft Actor-Critic (SAC) algorithm with a Gaussian policy has become a mainstream implementation for realizing the Maximum Entropy Reinforcement Learning (MaxEnt RL) objective, which incorporates entropy maximization to encourage exploration and enhance policy robustness. While the Gaussian policy performs well on simpler tasks, its exploration capacity and potential performance in complex mult&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11612v2-abstract-full').style.display = 'inline'; document.getElementById('2502.11612v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11612v2-abstract-full" style="display: none;"> The Soft Actor-Critic (SAC) algorithm with a Gaussian policy has become a mainstream implementation for realizing the Maximum Entropy Reinforcement Learning (MaxEnt RL) objective, which incorporates entropy maximization to encourage exploration and enhance policy robustness. While the Gaussian policy performs well on simpler tasks, its exploration capacity and potential performance in complex multi-goal RL environments are limited by its inherent unimodality. In this paper, we employ the diffusion model, a powerful generative model capable of capturing complex multimodal distributions, as the policy representation to fulfill the MaxEnt RL objective, developing a method named MaxEnt RL with Diffusion Policy (MaxEntDP). Our method enables efficient exploration and brings the policy closer to the optimal MaxEnt policy. Experimental results on Mujoco benchmarks show that MaxEntDP outperforms the Gaussian policy and other generative models within the MaxEnt RL framework, and performs comparably to other state-of-the-art diffusion-based online RL algorithms. Our code is available at https://github.com/diffusionyes/MaxEntDP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11612v2-abstract-full').style.display = 'none'; document.getElementById('2502.11612v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11394">arXiv:2502.11394</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.11394">pdf</a>, <a href="https://arxiv.org/format/2502.11394">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Oversmoothing as Loss of Sign: Towards Structural Balance in Graph Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jiaqi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+X">Xinyi Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">James Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Y">Yifei Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11394v1-abstract-short" style="display: inline;"> Oversmoothing is a common issue in graph neural networks (GNNs), where node representations become excessively homogeneous as the number of layers increases, resulting in degraded performance. Various strategies have been proposed to combat oversmoothing in practice, yet they are based on different heuristics and lack a unified understanding of their inherent mechanisms. In this paper, we show tha&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11394v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11394v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11394v1-abstract-full" style="display: none;"> Oversmoothing is a common issue in graph neural networks (GNNs), where node representations become excessively homogeneous as the number of layers increases, resulting in degraded performance. Various strategies have been proposed to combat oversmoothing in practice, yet they are based on different heuristics and lack a unified understanding of their inherent mechanisms. In this paper, we show that three major classes of anti-oversmoothing techniques can be mathematically interpreted as message passing over signed graphs comprising both positive and negative edges. By analyzing the asymptotic behavior of signed graph propagation, we demonstrate that negative edges can repel nodes to a certain extent, providing deeper insights into how these methods mitigate oversmoothing. Furthermore, our results suggest that the structural balance of a signed graph-where positive edges exist only within clusters and negative edges appear only between clusters-is crucial for clustering node representations in the long term through signed graph propagation. Motivated by these observations, we propose a solution to mitigate oversmoothing with theoretical guarantees-Structural Balance Propagation (SBP), by incorporating label and feature information to create a structurally balanced graph for message-passing. Experiments on nine datasets against twelve baselines demonstrate the effectiveness of our method, highlighting the value of our signed graph perspective. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11394v1-abstract-full').style.display = 'none'; document.getElementById('2502.11394v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11239">arXiv:2502.11239</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.11239">pdf</a>, <a href="https://arxiv.org/format/2502.11239">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Towards identifying possible fault-tolerant advantage of quantum linear system algorithms in terms of space, time and energy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Tu%2C+Y">Yue Tu</a>, <a href="/search/cs?searchtype=author&amp;query=Dubynskyi%2C+M">Mark Dubynskyi</a>, <a href="/search/cs?searchtype=author&amp;query=Mohammadisiahroudi%2C+M">Mohammadhossein Mohammadisiahroudi</a>, <a href="/search/cs?searchtype=author&amp;query=Riashchentceva%2C+E">Ekaterina Riashchentceva</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jinglei Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Ryashchentsev%2C+D">Dmitry Ryashchentsev</a>, <a href="/search/cs?searchtype=author&amp;query=Terlaky%2C+T">Tam谩s Terlaky</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Junyu Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11239v2-abstract-short" style="display: inline;"> Quantum computing, a prominent non-Von Neumann paradigm beyond Moore&#39;s law, can offer superpolynomial speedups for certain problems. Yet its advantages in efficiency for tasks like machine learning remain under investigation, and quantum noise complicates resource estimations and classical comparisons. We provide a detailed estimation of space, time, and energy resources for fault-tolerant superco&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11239v2-abstract-full').style.display = 'inline'; document.getElementById('2502.11239v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11239v2-abstract-full" style="display: none;"> Quantum computing, a prominent non-Von Neumann paradigm beyond Moore&#39;s law, can offer superpolynomial speedups for certain problems. Yet its advantages in efficiency for tasks like machine learning remain under investigation, and quantum noise complicates resource estimations and classical comparisons. We provide a detailed estimation of space, time, and energy resources for fault-tolerant superconducting devices running the Harrow-Hassidim-Lloyd (HHL) algorithm, a quantum linear system solver relevant to linear algebra and machine learning. Excluding memory and data transfer, possible quantum advantages over the classical conjugate gradient method could emerge at $N \approx 2^{33} \sim 2^{48}$ or even lower, requiring ${O}(10^5)$ physical qubits, ${O}(10^{12}\sim10^{13})$ Joules, and ${O}(10^6)$ seconds under surface code fault-tolerance with three types of magic state distillation (15-1, 116-12, 225-1). Key parameters include condition number, sparsity, and precision $魏, s\approx{O}(10\sim100)$, $蔚\sim0.01$, and physical error $10^{-5}$. Our resource estimator adjusts $N, 魏, s, 蔚$, providing a map of quantum-classical boundaries and revealing where a practical quantum advantage may arise. Our work quantitatively determine how advanced a fault-tolerant quantum computer should be to achieve possible, significant benefits on problems related to real-world. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11239v2-abstract-full').style.display = 'none'; document.getElementById('2502.11239v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages, many figures. v2: correcting typos</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11176">arXiv:2502.11176</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.11176">pdf</a>, <a href="https://arxiv.org/format/2502.11176">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LogiDynamics: Unraveling the Dynamics of Logical Inference in Large Language Model Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+T">Tianshi Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jiayang Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+C">Chunyang Li</a>, <a href="/search/cs?searchtype=author&amp;query=Shi%2C+H">Haochen Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zihao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Bai%2C+J">Jiaxin Bai</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+Y">Yangqiu Song</a>, <a href="/search/cs?searchtype=author&amp;query=Wong%2C+G+Y">Ginny Y. Wong</a>, <a href="/search/cs?searchtype=author&amp;query=See%2C+S">Simon See</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11176v1-abstract-short" style="display: inline;"> Modern large language models (LLMs) employ various forms of logical inference, both implicitly and explicitly, when addressing reasoning tasks. Understanding how to optimally leverage these inference paradigms is critical for advancing LLMs&#39; reasoning capabilities. This paper adopts an exploratory approach by introducing a controlled evaluation environment for analogical reasoning -- a fundamental&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11176v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11176v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11176v1-abstract-full" style="display: none;"> Modern large language models (LLMs) employ various forms of logical inference, both implicitly and explicitly, when addressing reasoning tasks. Understanding how to optimally leverage these inference paradigms is critical for advancing LLMs&#39; reasoning capabilities. This paper adopts an exploratory approach by introducing a controlled evaluation environment for analogical reasoning -- a fundamental cognitive task -- that is systematically parameterized across three dimensions: modality (textual, visual, symbolic), difficulty (easy, medium, hard), and task format (multiple-choice or free-text generation). We analyze the comparative dynamics of inductive, abductive, and deductive inference pipelines across these dimensions, and demonstrate that our findings generalize to broader in-context learning tasks. Additionally, we investigate advanced paradigms such as hypothesis selection, verification, and refinement, revealing their potential to scale up logical inference in LLM reasoning. This exploratory study provides a foundation for future research in enhancing LLM reasoning through systematic logical inference strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11176v1-abstract-full').style.display = 'none'; document.getElementById('2502.11176v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10448">arXiv:2502.10448</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.10448">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Supply Chain Network Security Investment Strategies Based on Nonlinear Budget Constraints: The Moderating Roles of Market Share and Attack Risk </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jiajie Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jiaxin Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+C">Caijiao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+L">Luxiang Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+Y">Yusheng Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Bao%2C+Y">Yujie Bao</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+W">Wen Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10448v1-abstract-short" style="display: inline;"> In the context of the rapid development of digital supply chain networks, dealing with the increasing cybersecurity threats and formulating effective security investment strategies to defend against cyberattack risks are the core issues in supply chain management. Cybersecurity investment decision-making is a key strategic task in enterprise supply chain manage-ment. Traditional game theory models&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10448v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10448v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10448v1-abstract-full" style="display: none;"> In the context of the rapid development of digital supply chain networks, dealing with the increasing cybersecurity threats and formulating effective security investment strategies to defend against cyberattack risks are the core issues in supply chain management. Cybersecurity investment decision-making is a key strategic task in enterprise supply chain manage-ment. Traditional game theory models and linear programming methods make it challenging to deal with complex problems such as multi-party par-ticipation in the supply chain, resource constraints, and risk uncertainty, re-sulting in enterprises facing high risks and uncertainties in the field of cy-bersecurity. To effectively meet this challenge, this study proposes a nonlin-ear budget-constrained cybersecurity investment optimization model based on variational inequality and projection shrinkage algorithm. This method simulates the impact of market competition on security investment by intro-ducing market share variables, combining variational inequality and projec-tion shrinkage algorithm to solve the model, and analyzing the effect of dif-ferent variables such as budget constraints, cyberattack losses, and market share on supply chain network security. In numerical analysis, the model achieved high cybersecurity levels of 0.96 and 0.95 in the experimental sce-narios of two retailers and two demand markets, respectively, and the budget constraint analysis revealed the profound impact of budget constraints on cybersecurity investment. Through numerical experiments and comparative analysis, the effectiveness and operability of this method in improving sup-ply chain network security are verified. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10448v1-abstract-full').style.display = 'none'; document.getElementById('2502.10448v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under Consideration at Operations Management Research</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10038">arXiv:2502.10038</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.10038">pdf</a>, <a href="https://arxiv.org/format/2502.10038">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> POI-Enhancer: An LLM-based Semantic Enhancement Framework for POI Representation Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jiawei Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+J">Jingyuan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yichuan Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Ji%2C+J">Jiahao Ji</a>, <a href="/search/cs?searchtype=author&amp;query=Zhu%2C+Y">Yuanshao Zhu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhibo Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+X">Xiangyu Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10038v2-abstract-short" style="display: inline;"> POI representation learning plays a crucial role in handling tasks related to user mobility data. Recent studies have shown that enriching POI representations with multimodal information can significantly enhance their task performance. Previously, the textual information incorporated into POI representations typically involved only POI categories or check-in content, leading to relatively weak te&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10038v2-abstract-full').style.display = 'inline'; document.getElementById('2502.10038v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10038v2-abstract-full" style="display: none;"> POI representation learning plays a crucial role in handling tasks related to user mobility data. Recent studies have shown that enriching POI representations with multimodal information can significantly enhance their task performance. Previously, the textual information incorporated into POI representations typically involved only POI categories or check-in content, leading to relatively weak textual features in existing methods. In contrast, large language models (LLMs) trained on extensive text data have been found to possess rich textual knowledge. However leveraging such knowledge to enhance POI representation learning presents two key challenges: first, how to extract POI-related knowledge from LLMs effectively, and second, how to integrate the extracted information to enhance POI representations. To address these challenges, we propose POI-Enhancer, a portable framework that leverages LLMs to improve POI representations produced by classic POI learning models. We first design three specialized prompts to extract semantic information from LLMs efficiently. Then, the Dual Feature Alignment module enhances the quality of the extracted information, while the Semantic Feature Fusion module preserves its integrity. The Cross Attention Fusion module then fully adaptively integrates such high-quality information into POI representations and Multi-View Contrastive Learning further injects human-understandable semantic information into these representations. Extensive experiments on three real-world datasets demonstrate the effectiveness of our framework, showing significant improvements across all baseline representations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10038v2-abstract-full').style.display = 'none'; document.getElementById('2502.10038v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AAAI 25</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01688">arXiv:2502.01688</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.01688">pdf</a>, <a href="https://arxiv.org/format/2502.01688">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> BrainOOD: Out-of-distribution Generalizable Brain Network Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Xu%2C+J">Jiaxing Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yongqiang Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Dong%2C+X">Xia Dong</a>, <a href="/search/cs?searchtype=author&amp;query=Lan%2C+M">Mengcheng Lan</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+T">Tiancheng Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Bian%2C+Q">Qingtian Bian</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">James Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Ke%2C+Y">Yiping Ke</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01688v1-abstract-short" style="display: inline;"> In neuroscience, identifying distinct patterns linked to neurological disorders, such as Alzheimer&#39;s and Autism, is critical for early diagnosis and effective intervention. Graph Neural Networks (GNNs) have shown promising in analyzing brain networks, but there are two major challenges in using GNNs: (1) distribution shifts in multi-site brain network data, leading to poor Out-of-Distribution (OOD&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01688v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01688v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01688v1-abstract-full" style="display: none;"> In neuroscience, identifying distinct patterns linked to neurological disorders, such as Alzheimer&#39;s and Autism, is critical for early diagnosis and effective intervention. Graph Neural Networks (GNNs) have shown promising in analyzing brain networks, but there are two major challenges in using GNNs: (1) distribution shifts in multi-site brain network data, leading to poor Out-of-Distribution (OOD) generalization, and (2) limited interpretability in identifying key brain regions critical to neurological disorders. Existing graph OOD methods, while effective in other domains, struggle with the unique characteristics of brain networks. To bridge these gaps, we introduce BrainOOD, a novel framework tailored for brain networks that enhances GNNs&#39; OOD generalization and interpretability. BrainOOD framework consists of a feature selector and a structure extractor, which incorporates various auxiliary losses including an improved Graph Information Bottleneck (GIB) objective to recover causal subgraphs. By aligning structure selection across brain networks and filtering noisy features, BrainOOD offers reliable interpretations of critical brain regions. Our approach outperforms 16 existing methods and improves generalization to OOD subjects by up to 8.5%. Case studies highlight the scientific validity of the patterns extracted, which aligns with the findings in known neuroscience literature. We also propose the first OOD brain network benchmark, which provides a foundation for future research in this field. Our code is available at https://github.com/AngusMonroe/BrainOOD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01688v1-abstract-full').style.display = 'none'; document.getElementById('2502.01688v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01522">arXiv:2502.01522</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.01522">pdf</a>, <a href="https://arxiv.org/format/2502.01522">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> BD-Diff: Generative Diffusion Model for Image Deblurring on Unknown Domains with Blur-Decoupled Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Junhao Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+W">Wei-Ting Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+X">Xi Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+M">Ming-Hsuan Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01522v1-abstract-short" style="display: inline;"> Generative diffusion models trained on large-scale datasets have achieved remarkable progress in image synthesis. In favor of their ability to supplement missing details and generate aesthetically pleasing contents, recent works have applied them to image deblurring tasks via training an adapter on blurry-sharp image pairs to provide structural conditions for restoration. However, acquiring substa&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01522v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01522v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01522v1-abstract-full" style="display: none;"> Generative diffusion models trained on large-scale datasets have achieved remarkable progress in image synthesis. In favor of their ability to supplement missing details and generate aesthetically pleasing contents, recent works have applied them to image deblurring tasks via training an adapter on blurry-sharp image pairs to provide structural conditions for restoration. However, acquiring substantial amounts of realistic paired data is challenging and costly in real-world scenarios. On the other hand, relying solely on synthetic data often results in overfitting, leading to unsatisfactory performance when confronted with unseen blur patterns. To tackle this issue, we propose BD-Diff, a generative-diffusion-based model designed to enhance deblurring performance on unknown domains by decoupling structural features and blur patterns through joint training on three specially designed tasks. We employ two Q-Formers as structural representations and blur patterns extractors separately. The features extracted by them will be used for the supervised deblurring task on synthetic data and the unsupervised blur-transfer task by leveraging unpaired blurred images from the target domain simultaneously. Furthermore, we introduce a reconstruction task to make the structural features and blur patterns complementary. This blur-decoupled learning process enhances the generalization capabilities of BD-Diff when encountering unknown domain blur patterns. Experiments on real-world datasets demonstrate that BD-Diff outperforms existing state-of-the-art methods in blur removal and structural preservation in various challenging scenarios. The codes will be released in https://github.com/donahowe/BD-Diff <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01522v1-abstract-full').style.display = 'none'; document.getElementById('2502.01522v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">We propose BD-Diff to integrate generative diffusion model into unpaired deblurring tasks</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01277">arXiv:2502.01277</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.01277">pdf</a>, <a href="https://arxiv.org/format/2502.01277">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> OCTOPINF: Workload-Aware Inference Serving for Edge Video Analytics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nguyen%2C+T">Thanh-Tung Nguyen</a>, <a href="/search/cs?searchtype=author&amp;query=Liebe%2C+L">Lucas Liebe</a>, <a href="/search/cs?searchtype=author&amp;query=Tau%2C+N">Nhat-Quang Tau</a>, <a href="/search/cs?searchtype=author&amp;query=Wu%2C+Y">Yuheng Wu</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jinghan Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+D">Dongman Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01277v1-abstract-short" style="display: inline;"> Edge Video Analytics (EVA) has gained significant attention as a major application of pervasive computing, enabling real-time visual processing. EVA pipelines, composed of deep neural networks (DNNs), typically demand efficient inference serving under stringent latency requirements, which is challenging due to the dynamic Edge environments (e.g., workload variability and network instability). More&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01277v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01277v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01277v1-abstract-full" style="display: none;"> Edge Video Analytics (EVA) has gained significant attention as a major application of pervasive computing, enabling real-time visual processing. EVA pipelines, composed of deep neural networks (DNNs), typically demand efficient inference serving under stringent latency requirements, which is challenging due to the dynamic Edge environments (e.g., workload variability and network instability). Moreover, EVA pipelines also face significant resource contention caused by resource (e.g., GPU) constraints at the Edge. In this paper, we introduce OCTOPINF, a novel resource-efficient and workload-aware inference serving system designed for real-time EVA. OCTOPINF tackles the unique challenges of dynamic edge environments through fine-grained resource allocation, adaptive batching, and workload balancing between edge devices and servers. Furthermore, we propose a spatiotemporal scheduling algorithm that optimizes the co-location of inference tasks on GPUs, improving performance and ensuring service-level objectives (SLOs) compliance. Extensive evaluations on a real-world testbed demonstrate the effectiveness of our approach. It achieves an effective throughput increase of up to 10x compared to the baselines and shows better robustness in challenging scenarios. OCTOPINF can be used for any DNN-based EVA inference task with minimal adaptation and is available at https://github.com/tungngreen/PipelineScheduler. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01277v1-abstract-full').style.display = 'none'; document.getElementById('2502.01277v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01083">arXiv:2502.01083</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.01083">pdf</a>, <a href="https://arxiv.org/format/2502.01083">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Tool Unlearning for Tool-Augmented LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jiali Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Amiri%2C+H">Hadi Amiri</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01083v1-abstract-short" style="display: inline;"> Tool-augmented large language models (LLMs) are often trained on datasets of query-response pairs, which embed the ability to use tools or APIs directly into the parametric knowledge of LLMs. Tool-augmented LLMs need the ability to forget learned tools due to security vulnerabilities, privacy regulations, or tool deprecations. However, ``tool unlearning&#39;&#39; has not been investigated in unlearning li&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01083v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01083v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01083v1-abstract-full" style="display: none;"> Tool-augmented large language models (LLMs) are often trained on datasets of query-response pairs, which embed the ability to use tools or APIs directly into the parametric knowledge of LLMs. Tool-augmented LLMs need the ability to forget learned tools due to security vulnerabilities, privacy regulations, or tool deprecations. However, ``tool unlearning&#39;&#39; has not been investigated in unlearning literature. We introduce this novel task, which requires addressing distinct challenges compared to traditional unlearning: knowledge removal rather than forgetting individual samples, the high cost of optimizing LLMs, and the need for principled evaluation metrics. To bridge these gaps, we propose ToolDelete, the first approach for unlearning tools from tool-augmented LLMs. It implements three key properties to address the above challenges for effective tool unlearning and introduces a new membership inference attack (MIA) model for effective evaluation. Extensive experiments on multiple tool learning datasets and tool-augmented LLMs show that ToolDelete effectively unlearns randomly selected tools, while preserving the LLM&#39;s knowledge on non-deleted tools and maintaining performance on general tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01083v1-abstract-full').style.display = 'none'; document.getElementById('2502.01083v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">https://clu-uml.github.io/MU-Bench-Project-Page/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00835">arXiv:2502.00835</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.00835">pdf</a>, <a href="https://arxiv.org/format/2502.00835">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> CAIMAN: Causal Action Influence Detection for Sample Efficient Loco-manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+Y">Yuanchen Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jin Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Urp%C3%AD%2C+N+A">N煤ria Armengol Urp铆</a>, <a href="/search/cs?searchtype=author&amp;query=Coros%2C+S">Stelian Coros</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00835v1-abstract-short" style="display: inline;"> Enabling legged robots to perform non-prehensile loco-manipulation with large and heavy objects is crucial for enhancing their versatility. However, this is a challenging task, often requiring sophisticated planning strategies or extensive task-specific reward shaping, especially in unstructured scenarios with obstacles. In this work, we present CAIMAN, a novel framework for learning loco-manipula&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00835v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00835v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00835v1-abstract-full" style="display: none;"> Enabling legged robots to perform non-prehensile loco-manipulation with large and heavy objects is crucial for enhancing their versatility. However, this is a challenging task, often requiring sophisticated planning strategies or extensive task-specific reward shaping, especially in unstructured scenarios with obstacles. In this work, we present CAIMAN, a novel framework for learning loco-manipulation that relies solely on sparse task rewards. We leverage causal action influence to detect states where the robot is in control over other entities in the environment, and use this measure as an intrinsically motivated objective to enable sample-efficient learning. We employ a hierarchical control strategy, combining a low-level locomotion policy with a high-level policy that prioritizes task-relevant velocity commands. Through simulated and real-world experiments, including object manipulation with obstacles, we demonstrate the framework&#39;s superior sample efficiency, adaptability to diverse environments, and successful transfer to hardware without fine-tuning. The proposed approach paves the way for scalable, robust, and autonomous loco-manipulation in real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00835v1-abstract-full').style.display = 'none'; document.getElementById('2502.00835v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00694">arXiv:2502.00694</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.00694">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Leveraging Large Language Models to Predict Antibody Biological Activity Against Influenza A Hemagglutinin </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Barkan%2C+E">Ella Barkan</a>, <a href="/search/cs?searchtype=author&amp;query=Siddiqui%2C+I">Ibrahim Siddiqui</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+K+J">Kevin J. Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Golts%2C+A">Alex Golts</a>, <a href="/search/cs?searchtype=author&amp;query=Shoshan%2C+Y">Yoel Shoshan</a>, <a href="/search/cs?searchtype=author&amp;query=Weber%2C+J+K">Jeffrey K. Weber</a>, <a href="/search/cs?searchtype=author&amp;query=Mota%2C+Y+C">Yailin Campos Mota</a>, <a href="/search/cs?searchtype=author&amp;query=Ozery-Flato%2C+M">Michal Ozery-Flato</a>, <a href="/search/cs?searchtype=author&amp;query=Sautto%2C+G+A">Giuseppe A. Sautto</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00694v1-abstract-short" style="display: inline;"> Monoclonal antibodies (mAbs) represent one of the most prevalent FDA-approved modalities for treating autoimmune diseases, infectious diseases, and cancers. However, discovery and development of therapeutic antibodies remains a time-consuming and expensive process. Recent advancements in machine learning (ML) and artificial intelligence (AI) have shown significant promise in revolutionizing antibo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00694v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00694v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00694v1-abstract-full" style="display: none;"> Monoclonal antibodies (mAbs) represent one of the most prevalent FDA-approved modalities for treating autoimmune diseases, infectious diseases, and cancers. However, discovery and development of therapeutic antibodies remains a time-consuming and expensive process. Recent advancements in machine learning (ML) and artificial intelligence (AI) have shown significant promise in revolutionizing antibody discovery and optimization. In particular, models that predict antibody biological activity enable in-silico evaluation of binding and functional properties; such models can prioritize antibodies with the highest likelihoods of success in costly and time-intensive laboratory testing procedures. We here explore an AI model for predicting the binding and receptor blocking activity of antibodies against influenza A hemagglutinin (HA) antigens. Our present model is developed with the MAMMAL framework for biologics discovery to predict antibody-antigen interactions using only sequence information. To evaluate the model&#39;s performance, we tested it under various data split conditions to mimic real-world scenarios. Our models achieved an AUROC $\geq$ 0.91 for predicting the activity of existing antibodies against seen HAs and an AUROC of 0.9 for unseen HAs. For novel antibody activity prediction, the AUROC was 0.73, which further declined to 0.63-0.66 under stringent constraints on similarity to existing antibodies. These results demonstrate the potential of AI foundation models to transform antibody design by reducing dependence on extensive laboratory testing and enabling more efficient prioritization of antibody candidates. Moreover, our findings emphasize the critical importance of diverse and comprehensive antibody datasets to improve the generalization of prediction models, particularly for novel antibody development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00694v1-abstract-full').style.display = 'none'; document.getElementById('2502.00694v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00085">arXiv:2502.00085</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2502.00085">pdf</a>, <a href="https://arxiv.org/format/2502.00085">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Efficient Beam Search for Large Language Models Using Trie-Based Decoding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Chan%2C+B+J">Brian J Chan</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jui-Hung Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+M+X">Mao Xun Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+C">Chao-Ting Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+H">Hen-Hsen Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00085v1-abstract-short" style="display: inline;"> In Transformer-based sequence-to-sequence generation, beam search has proven effective in enhancing the quality of generated sequences compared to greedy decoding. Conventional beam search methods typically adopt either a sequential or batch-based approach. The sequential approach, while memory-efficient, requires multiple decoding passes to construct a complete search tree, leading to significant&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00085v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00085v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00085v1-abstract-full" style="display: none;"> In Transformer-based sequence-to-sequence generation, beam search has proven effective in enhancing the quality of generated sequences compared to greedy decoding. Conventional beam search methods typically adopt either a sequential or batch-based approach. The sequential approach, while memory-efficient, requires multiple decoding passes to construct a complete search tree, leading to significantly slower inference. On the other hand, the batch-based approach enables parallel computation across beams, but at the expense of high memory consumption due to the need to maintain separate key-value (KV) caches for each beam. In this study, we introduce a novel trie (prefix-tree)-based parallel decoding method that addresses the memory inefficiency of batch-based beam search. By sharing a single KV cache among all beams that share the same prefix, the proposed method not only reduces memory consumption dramatically but also enables parallel decoding across all branches. This innovative use of a prefix tree offers an efficient alternative for beam search, achieving significant memory savings while preserving inference speed, making it particularly well-suited for memory-constrained environments or large-scale model deployments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00085v1-abstract-full').style.display = 'none'; document.getElementById('2502.00085v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18778">arXiv:2501.18778</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.18778">pdf</a>, <a href="https://arxiv.org/format/2501.18778">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3706598.3713500">10.1145/3706598.3713500 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Beyond Automation: How UI/UX Designers Perceive AI as a Creative Partner in the Divergent Thinking Stages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Khan%2C+A">Abidullah Khan</a>, <a href="/search/cs?searchtype=author&amp;query=Shokrizadeh%2C+A">Atefeh Shokrizadeh</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jinghui Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18778v1-abstract-short" style="display: inline;"> Divergent thinking activities, like research and ideation, are key drivers of innovation in UI/UX design. Existing research has explored AI&#39;s role in automating design tasks, but leaves a critical gap in understanding how AI specifically influences divergent thinking. To address this, we conducted interviews with 19 professional UI/UX designers, examining their use and perception of AI in these cr&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18778v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18778v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18778v1-abstract-full" style="display: none;"> Divergent thinking activities, like research and ideation, are key drivers of innovation in UI/UX design. Existing research has explored AI&#39;s role in automating design tasks, but leaves a critical gap in understanding how AI specifically influences divergent thinking. To address this, we conducted interviews with 19 professional UI/UX designers, examining their use and perception of AI in these creative activities. We found that in this context, participants valued AI tools that offer greater control over ideation, facilitate collaboration, enhance efficiency to liberate creativity, and align with their visual habits. Our results indicated four key roles AI plays in supporting divergent thinking: aiding research, kick-starting creativity, generating design alternatives, and facilitating prototype exploration. Through this study, we provide insights into the evolving role of AI in the less-investigated area of divergent thinking in UI/UX design, offering recommendations for future AI tools that better support design innovation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18778v1-abstract-full').style.display = 'none'; document.getElementById('2501.18778v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, CHI2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18748">arXiv:2501.18748</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.18748">pdf</a>, <a href="https://arxiv.org/format/2501.18748">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3706598.3713785">10.1145/3706598.3713785 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Dancing With Chains: Ideating Under Constraints With UIDEC in UI/UX Design </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Shokrizadeh%2C+A">Atefeh Shokrizadeh</a>, <a href="/search/cs?searchtype=author&amp;query=Tadjuidje%2C+B+B">Boniface Bahati Tadjuidje</a>, <a href="/search/cs?searchtype=author&amp;query=Kumar%2C+S">Shivam Kumar</a>, <a href="/search/cs?searchtype=author&amp;query=Kamble%2C+S">Sohan Kamble</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jinghui Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18748v1-abstract-short" style="display: inline;"> UI/UX designers often work under constraints like brand identity, design norms, and industry guidelines. How these constraints impact designers&#39; ideation and exploration processes should be addressed in creativity-support tools for design. Through an exploratory interview study, we identified three designer personas with varying views on having constraints in the ideation process, which guided the&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18748v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18748v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18748v1-abstract-full" style="display: none;"> UI/UX designers often work under constraints like brand identity, design norms, and industry guidelines. How these constraints impact designers&#39; ideation and exploration processes should be addressed in creativity-support tools for design. Through an exploratory interview study, we identified three designer personas with varying views on having constraints in the ideation process, which guided the creation of UIDEC, a GenAI-powered tool for supporting creativity under constraints. UIDEC allows designers to specify project details, such as purpose, target audience, industry, and design styles, based on which it generates diverse design examples that adhere to these constraints, with minimal need to write prompts. In a user evaluation involving designers representing the identified personas, participants found UIDEC compatible with their existing ideation process and useful for creative inspiration, especially when starting new projects. Our work provides design implications to AI-powered tools that integrate constraints during UI/UX design ideation to support creativity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18748v1-abstract-full').style.display = 'none'; document.getElementById('2501.18748v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 8 figures, CHI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14224">arXiv:2501.14224</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.14224">pdf</a>, <a href="https://arxiv.org/format/2501.14224">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Top Ten Challenges Towards Agentic Neural Graph Databases </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Bai%2C+J">Jiaxin Bai</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+Z">Zihao Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+Y">Yukun Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Yin%2C+H">Hang Yin</a>, <a href="/search/cs?searchtype=author&amp;query=Fei%2C+W">Weizhi Fei</a>, <a href="/search/cs?searchtype=author&amp;query=Hu%2C+Q">Qi Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+Z">Zheye Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jiayang Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Zheng%2C+T">Tianshi Zheng</a>, <a href="/search/cs?searchtype=author&amp;query=Tsang%2C+H+T">Hong Ting Tsang</a>, <a href="/search/cs?searchtype=author&amp;query=Gao%2C+Y">Yisen Gao</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+Z">Zhongwei Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Y">Yufei Li</a>, <a href="/search/cs?searchtype=author&amp;query=Fan%2C+L">Lixin Fan</a>, <a href="/search/cs?searchtype=author&amp;query=Yuan%2C+B">Binhang Yuan</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+W">Wei Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+L">Lei Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+X">Xiaofang Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Song%2C+Y">Yangqiu Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14224v1-abstract-short" style="display: inline;"> Graph databases (GDBs) like Neo4j and TigerGraph excel at handling interconnected data but lack advanced inference capabilities. Neural Graph Databases (NGDBs) address this by integrating Graph Neural Networks (GNNs) for predictive analysis and reasoning over incomplete or noisy data. However, NGDBs rely on predefined queries and lack autonomy and adaptability. This paper introduces Agentic Neural&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14224v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14224v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14224v1-abstract-full" style="display: none;"> Graph databases (GDBs) like Neo4j and TigerGraph excel at handling interconnected data but lack advanced inference capabilities. Neural Graph Databases (NGDBs) address this by integrating Graph Neural Networks (GNNs) for predictive analysis and reasoning over incomplete or noisy data. However, NGDBs rely on predefined queries and lack autonomy and adaptability. This paper introduces Agentic Neural Graph Databases (Agentic NGDBs), which extend NGDBs with three core functionalities: autonomous query construction, neural query execution, and continuous learning. We identify ten key challenges in realizing Agentic NGDBs: semantic unit representation, abductive reasoning, scalable query execution, and integration with foundation models like large language models (LLMs). By addressing these challenges, Agentic NGDBs can enable intelligent, self-improving systems for modern data-driven applications, paving the way for adaptable and autonomous data management solutions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14224v1-abstract-full').style.display = 'none'; document.getElementById('2501.14224v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 Pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12385">arXiv:2501.12385</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.12385">pdf</a>, <a href="https://arxiv.org/format/2501.12385">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Audio Texture Manipulation by Exemplar-Based Analogy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+K+J">Kan Jen Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+T">Tingle Li</a>, <a href="/search/cs?searchtype=author&amp;query=Anumanchipalli%2C+G">Gopala Anumanchipalli</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12385v1-abstract-short" style="display: inline;"> Audio texture manipulation involves modifying the perceptual characteristics of a sound to achieve specific transformations, such as adding, removing, or replacing auditory elements. In this paper, we propose an exemplar-based analogy model for audio texture manipulation. Instead of conditioning on text-based instructions, our method uses paired speech examples, where one clip represents the origi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12385v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12385v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12385v1-abstract-full" style="display: none;"> Audio texture manipulation involves modifying the perceptual characteristics of a sound to achieve specific transformations, such as adding, removing, or replacing auditory elements. In this paper, we propose an exemplar-based analogy model for audio texture manipulation. Instead of conditioning on text-based instructions, our method uses paired speech examples, where one clip represents the original sound and another illustrates the desired transformation. The model learns to apply the same transformation to new input, allowing for the manipulation of sound textures. We construct a quadruplet dataset representing various editing tasks, and train a latent diffusion model in a self-supervised manner. We show through quantitative evaluations and perceptual studies that our model outperforms text-conditioned baselines and generalizes to real-world, out-of-distribution, and non-speech scenarios. Project page: https://berkeley-speech-group.github.io/audio-texture-analogy/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12385v1-abstract-full').style.display = 'none'; document.getElementById('2501.12385v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11854">arXiv:2501.11854</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.11854">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> WaveNet-SF: A Hybrid Network for Retinal Disease Detection Based on Wavelet Transform in the Spatial-Frequency Domain </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jilan Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Long%2C+G">Guoli Long</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zeyu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Qi%2C+Z">Zhenjia Qi</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hanyu Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+L">Libin Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+S">Shuihua Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yudong Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Hong%2C+J">Jin Hong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11854v1-abstract-short" style="display: inline;"> Retinal diseases are a leading cause of vision impairment and blindness, with timely diagnosis being critical for effective treatment. Optical Coherence Tomography (OCT) has become a standard imaging modality for retinal disease diagnosis, but OCT images often suffer from issues such as speckle noise, complex lesion shapes, and varying lesion sizes, making interpretation challenging. In this paper&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11854v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11854v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11854v1-abstract-full" style="display: none;"> Retinal diseases are a leading cause of vision impairment and blindness, with timely diagnosis being critical for effective treatment. Optical Coherence Tomography (OCT) has become a standard imaging modality for retinal disease diagnosis, but OCT images often suffer from issues such as speckle noise, complex lesion shapes, and varying lesion sizes, making interpretation challenging. In this paper, we propose a novel framework, WaveNet-SF, to enhance retinal disease detection by integrating spatial-domain and frequency-domain learning. The framework utilizes wavelet transforms to decompose OCT images into low- and high-frequency components, enabling the model to extract both global structural features and fine-grained details. To improve lesion detection, we introduce a multi-scale wavelet spatial attention (MSW-SA) module, which enhances the model&#39;s focus on regions of interest at multiple scales. Additionally, a high-frequency feature compensation block (HFFC) is incorporated to recover edge information lost during wavelet decomposition, suppress noise, and preserve fine details crucial for lesion detection. Our approach achieves state-of-the-art (SOTA) classification accuracies of 97.82% and 99. 58% on the OCT-C8 and OCT2017 datasets, respectively, surpassing existing methods. These results demonstrate the efficacy of WaveNet-SF in addressing the challenges of OCT image analysis and its potential as a powerful tool for retinal disease diagnosis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11854v1-abstract-full').style.display = 'none'; document.getElementById('2501.11854v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11748">arXiv:2501.11748</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.11748">pdf</a>, <a href="https://arxiv.org/format/2501.11748">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3711105">10.1145/3711105 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Who is to Blame: A Comprehensive Review of Challenges and Opportunities in Designer-Developer Collaboration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+S">Shutong Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+T">Tianyu Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jinghui Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+S">Shurui Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11748v1-abstract-short" style="display: inline;"> Software development relies on effective collaboration between Software Development Engineers (SDEs) and User eXperience Designers (UXDs) to create software products of high quality and usability. While this collaboration issue has been explored over the past decades, anecdotal evidence continues to indicate the existence of challenges in their collaborative efforts. To understand this gap, we fir&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11748v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11748v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11748v1-abstract-full" style="display: none;"> Software development relies on effective collaboration between Software Development Engineers (SDEs) and User eXperience Designers (UXDs) to create software products of high quality and usability. While this collaboration issue has been explored over the past decades, anecdotal evidence continues to indicate the existence of challenges in their collaborative efforts. To understand this gap, we first conducted a systematic literature review (SLR) of 45 papers published since 2004, uncovering three key collaboration challenges and two main categories of potential best practices. We then analyzed designer and developer forums and discussions from one open-source software repository to assess how the challenges and practices manifest in the status quo. Our findings have broad applicability for collaboration in software development, extending beyond the partnership between SDEs and UXDs. The suggested best practices and interventions also act as a reference for future research, assisting in the development of dedicated collaboration tools for SDEs and UXDs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11748v1-abstract-full').style.display = 'none'; document.getElementById('2501.11748v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11299">arXiv:2501.11299</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.11299">pdf</a>, <a href="https://arxiv.org/format/2501.11299">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MIFNet: Learning Modality-Invariant Features for Generalizable Multimodal Image Matching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Liu%2C+Y">Yepeng Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Sun%2C+Z">Zhichao Sun</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+B">Baosheng Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+Y">Yitian Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Du%2C+B">Bo Du</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+Y">Yongchao Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jun Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11299v1-abstract-short" style="display: inline;"> Many keypoint detection and description methods have been proposed for image matching or registration. While these methods demonstrate promising performance for single-modality image matching, they often struggle with multimodal data because the descriptors trained on single-modality data tend to lack robustness against the non-linear variations present in multimodal data. Extending such methods t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11299v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11299v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11299v1-abstract-full" style="display: none;"> Many keypoint detection and description methods have been proposed for image matching or registration. While these methods demonstrate promising performance for single-modality image matching, they often struggle with multimodal data because the descriptors trained on single-modality data tend to lack robustness against the non-linear variations present in multimodal data. Extending such methods to multimodal image matching often requires well-aligned multimodal data to learn modality-invariant descriptors. However, acquiring such data is often costly and impractical in many real-world scenarios. To address this challenge, we propose a modality-invariant feature learning network (MIFNet) to compute modality-invariant features for keypoint descriptions in multimodal image matching using only single-modality training data. Specifically, we propose a novel latent feature aggregation module and a cumulative hybrid aggregation module to enhance the base keypoint descriptors trained on single-modality data by leveraging pre-trained features from Stable Diffusion models. We validate our method with recent keypoint detection and description methods in three multimodal retinal image datasets (CF-FA, CF-OCT, EMA-OCTA) and two remote sensing datasets (Optical-SAR and Optical-NIR). Extensive experiments demonstrate that the proposed MIFNet is able to learn modality-invariant feature for multimodal image matching without accessing the targeted modality and has good zero-shot generalization ability. The source code will be made publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11299v1-abstract-full').style.display = 'none'; document.getElementById('2501.11299v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10499">arXiv:2501.10499</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.10499">pdf</a>, <a href="https://arxiv.org/format/2501.10499">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Learning More With Less: Sample Efficient Dynamics Learning and Model-Based RL for Loco-Manipulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hoffman%2C+B">Benjamin Hoffman</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jin Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+C">Chenhao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Coros%2C+S">Stelian Coros</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10499v2-abstract-short" style="display: inline;"> Combining the agility of legged locomotion with the capabilities of manipulation, loco-manipulation platforms have the potential to perform complex tasks in real-world applications. To this end, state-of-the-art quadrupeds with attached manipulators, such as the Boston Dynamics Spot, have emerged to provide a capable and robust platform. However, both the complexity of loco-manipulation control, a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10499v2-abstract-full').style.display = 'inline'; document.getElementById('2501.10499v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10499v2-abstract-full" style="display: none;"> Combining the agility of legged locomotion with the capabilities of manipulation, loco-manipulation platforms have the potential to perform complex tasks in real-world applications. To this end, state-of-the-art quadrupeds with attached manipulators, such as the Boston Dynamics Spot, have emerged to provide a capable and robust platform. However, both the complexity of loco-manipulation control, as well as the black-box nature of commercial platforms pose challenges for developing accurate dynamics models and control policies. We address these challenges by developing a hand-crafted kinematic model for a quadruped-with-arm platform and, together with recent advances in Bayesian Neural Network (BNN)-based dynamics learning using physical priors, efficiently learn an accurate dynamics model from data. We then derive control policies for loco-manipulation via model-based reinforcement learning (RL). We demonstrate the effectiveness of this approach on hardware using the Boston Dynamics Spot with a manipulator, accurately performing dynamic end-effector trajectory tracking even in low data regimes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10499v2-abstract-full').style.display = 'none'; document.getElementById('2501.10499v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Master Thesis at ETH Zurich</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08654">arXiv:2501.08654</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.08654">pdf</a>, <a href="https://arxiv.org/format/2501.08654">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ZeroStereo: Zero-shot Stereo Matching from Single Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xianqi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+H">Hao Yang</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+G">Gangwei Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Junda Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Lin%2C+M">Min Lin</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+Y">Yong Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Zang%2C+J">Jinliang Zang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yurui Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+X">Xin Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08654v2-abstract-short" style="display: inline;"> State-of-the-art supervised stereo matching methods have achieved remarkable performance on various benchmarks. However, their generalization to real-world scenarios remains challenging due to the scarcity of annotated real-world stereo data. In this paper, we propose ZeroStereo, a novel stereo image generation pipeline for zero-shot stereo matching. Our approach synthesizes high-quality right ima&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08654v2-abstract-full').style.display = 'inline'; document.getElementById('2501.08654v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08654v2-abstract-full" style="display: none;"> State-of-the-art supervised stereo matching methods have achieved remarkable performance on various benchmarks. However, their generalization to real-world scenarios remains challenging due to the scarcity of annotated real-world stereo data. In this paper, we propose ZeroStereo, a novel stereo image generation pipeline for zero-shot stereo matching. Our approach synthesizes high-quality right images from arbitrary single images by leveraging pseudo disparities generated by a monocular depth estimation model. Unlike previous methods that address occluded regions by filling missing areas with neighboring pixels or random backgrounds, we fine-tune a diffusion inpainting model to recover missing details while preserving semantic structure. Additionally, we propose Training-Free Confidence Generation, which mitigates the impact of unreliable pseudo labels without additional training, and Adaptive Disparity Selection, which ensures a diverse and realistic disparity distribution while preventing excessive occlusion and foreground distortion. Experiments demonstrate that models trained with our pipeline achieve state-of-the-art zero-shot generalization across multiple datasets with only a dataset volume comparable to Scene Flow. Code: https://github.com/Windsrain/ZeroStereo. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08654v2-abstract-full').style.display = 'none'; document.getElementById('2501.08654v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08643">arXiv:2501.08643</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.08643">pdf</a>, <a href="https://arxiv.org/format/2501.08643">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MonSter: Marry Monodepth to Stereo Unleashes Power </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Junda Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+L">Longliang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Xu%2C+G">Gangwei Xu</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+X">Xianqi Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhaoxing Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+Y">Yong Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Zang%2C+J">Jinliang Zang</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Y">Yurui Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Cai%2C+Z">Zhipeng Cai</a>, <a href="/search/cs?searchtype=author&amp;query=Yang%2C+X">Xin Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08643v1-abstract-short" style="display: inline;"> Stereo matching recovers depth from image correspondences. Existing methods struggle to handle ill-posed regions with limited matching cues, such as occlusions and textureless areas. To address this, we propose MonSter, a novel method that leverages the complementary strengths of monocular depth estimation and stereo matching. MonSter integrates monocular depth and stereo matching into a dual-bran&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08643v1-abstract-full').style.display = 'inline'; document.getElementById('2501.08643v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08643v1-abstract-full" style="display: none;"> Stereo matching recovers depth from image correspondences. Existing methods struggle to handle ill-posed regions with limited matching cues, such as occlusions and textureless areas. To address this, we propose MonSter, a novel method that leverages the complementary strengths of monocular depth estimation and stereo matching. MonSter integrates monocular depth and stereo matching into a dual-branch architecture to iteratively improve each other. Confidence-based guidance adaptively selects reliable stereo cues for monodepth scale-shift recovery. The refined monodepth is in turn guides stereo effectively at ill-posed regions. Such iterative mutual enhancement enables MonSter to evolve monodepth priors from coarse object-level structures to pixel-level geometry, fully unlocking the potential of stereo matching. As shown in Fig.1, MonSter ranks 1st across five most commonly used leaderboards -- SceneFlow, KITTI 2012, KITTI 2015, Middlebury, and ETH3D. Achieving up to 49.5% improvements (Bad 1.0 on ETH3D) over the previous best method. Comprehensive analysis verifies the effectiveness of MonSter in ill-posed regions. In terms of zero-shot generalization, MonSter significantly and consistently outperforms state-of-the-art across the board. The code is publicly available at: https://github.com/Junda24/MonSter. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08643v1-abstract-full').style.display = 'none'; document.getElementById('2501.08643v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08466">arXiv:2501.08466</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.08466">pdf</a>, <a href="https://arxiv.org/format/2501.08466">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Short-Term Predict-Then-Cluster Framework for Meal Delivery Services </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jingyi Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Azadeh%2C+S+S">Shadi Sharif Azadeh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08466v1-abstract-short" style="display: inline;"> Micro-delivery services offer promising solutions for on-demand city logistics, but their success relies on efficient real-time delivery operations and fleet management. On-demand meal delivery platforms seek to optimize real-time operations based on anticipatory insights into citywide demand distributions. To address these needs, this study proposes a short-term predict-then-cluster framework for&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08466v1-abstract-full').style.display = 'inline'; document.getElementById('2501.08466v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08466v1-abstract-full" style="display: none;"> Micro-delivery services offer promising solutions for on-demand city logistics, but their success relies on efficient real-time delivery operations and fleet management. On-demand meal delivery platforms seek to optimize real-time operations based on anticipatory insights into citywide demand distributions. To address these needs, this study proposes a short-term predict-then-cluster framework for on-demand meal delivery services. The framework utilizes ensemble-learning methods for point and distributional forecasting with multivariate features, including lagged-dependent inputs to capture demand dynamics. We introduce Constrained K-Means Clustering (CKMC) and Contiguity Constrained Hierarchical Clustering with Iterative Constraint Enforcement (CCHC-ICE) to generate dynamic clusters based on predicted demand and geographical proximity, tailored to user-defined operational constraints. Evaluations of European and Taiwanese case studies demonstrate that the proposed methods outperform traditional time series approaches in both accuracy and computational efficiency. Clustering results demonstrate that the incorporation of distributional predictions effectively addresses demand uncertainties, improving the quality of operational insights. Additionally, a simulation study demonstrates the practical value of short-term demand predictions for proactive strategies, such as idle fleet rebalancing, significantly enhancing delivery efficiency. By addressing demand uncertainties and operational constraints, our predict-then-cluster framework provides actionable insights for optimizing real-time operations. The approach is adaptable to other on-demand platform-based city logistics and passenger mobility services, promoting sustainable and efficient urban operations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08466v1-abstract-full').style.display = 'none'; document.getElementById('2501.08466v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05808">arXiv:2501.05808</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.05808">pdf</a>, <a href="https://arxiv.org/format/2501.05808">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Real-Time Integrated Dispatching and Idle Fleet Steering with Deep Reinforcement Learning for A Meal Delivery Platform </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jingyi Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Azadeh%2C+S+S">Shadi Sharif Azadeh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05808v1-abstract-short" style="display: inline;"> To achieve high service quality and profitability, meal delivery platforms like Uber Eats and Grubhub must strategically operate their fleets to ensure timely deliveries for current orders while mitigating the consequential impacts of suboptimal decisions that leads to courier understaffing in the future. This study set out to solve the real-time order dispatching and idle courier steering problem&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05808v1-abstract-full').style.display = 'inline'; document.getElementById('2501.05808v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05808v1-abstract-full" style="display: none;"> To achieve high service quality and profitability, meal delivery platforms like Uber Eats and Grubhub must strategically operate their fleets to ensure timely deliveries for current orders while mitigating the consequential impacts of suboptimal decisions that leads to courier understaffing in the future. This study set out to solve the real-time order dispatching and idle courier steering problems for a meal delivery platform by proposing a reinforcement learning (RL)-based strategic dual-control framework. To address the inherent sequential nature of these problems, we model both order dispatching and courier steering as Markov Decision Processes. Trained via a deep reinforcement learning (DRL) framework, we obtain strategic policies by leveraging the explicitly predicted demands as part of the inputs. In our dual-control framework, the dispatching and steering policies are iteratively trained in an integrated manner. These forward-looking policies can be executed in real-time and provide decisions while jointly considering the impacts on local and network levels. To enhance dispatching fairness, we propose convolutional deep Q networks to construct fair courier embeddings. To simultaneously rebalance the supply and demand within the service network, we propose to utilize mean-field approximated supply-demand knowledge to reallocate idle couriers at the local level. Utilizing the policies generated by the RL-based strategic dual-control framework, we find the delivery efficiency and fairness of workload distribution among couriers have been improved, and under-supplied conditions have been alleviated within the service network. Our study sheds light on designing an RL-based framework to enable forward-looking real-time operations for meal delivery platforms and other on-demand services. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05808v1-abstract-full').style.display = 'none'; document.getElementById('2501.05808v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02845">arXiv:2501.02845</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.02845">pdf</a>, <a href="https://arxiv.org/format/2501.02845">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> HOGSA: Bimanual Hand-Object Interaction Understanding with 3D Gaussian Splatting Based Data Augmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qu%2C+W">Wentian Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jiahe Li</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jian Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Shi%2C+J">Jian Shi</a>, <a href="/search/cs?searchtype=author&amp;query=Meng%2C+C">Chenyu Meng</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+C">Cuixia Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hongan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+X">Xiaoming Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Y">Yinda Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02845v1-abstract-short" style="display: inline;"> Understanding of bimanual hand-object interaction plays an important role in robotics and virtual reality. However, due to significant occlusions between hands and object as well as the high degree-of-freedom motions, it is challenging to collect and annotate a high-quality, large-scale dataset, which prevents further improvement of bimanual hand-object interaction-related baselines. In this work,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02845v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02845v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02845v1-abstract-full" style="display: none;"> Understanding of bimanual hand-object interaction plays an important role in robotics and virtual reality. However, due to significant occlusions between hands and object as well as the high degree-of-freedom motions, it is challenging to collect and annotate a high-quality, large-scale dataset, which prevents further improvement of bimanual hand-object interaction-related baselines. In this work, we propose a new 3D Gaussian Splatting based data augmentation framework for bimanual hand-object interaction, which is capable of augmenting existing dataset to large-scale photorealistic data with various hand-object pose and viewpoints. First, we use mesh-based 3DGS to model objects and hands, and to deal with the rendering blur problem due to multi-resolution input images used, we design a super-resolution module. Second, we extend the single hand grasping pose optimization module for the bimanual hand object to generate various poses of bimanual hand-object interaction, which can significantly expand the pose distribution of the dataset. Third, we conduct an analysis for the impact of different aspects of the proposed data augmentation on the understanding of the bimanual hand-object interaction. We perform our data augmentation on two benchmarks, H2O and Arctic, and verify that our method can improve the performance of the baselines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02845v1-abstract-full').style.display = 'none'; document.getElementById('2501.02845v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02831">arXiv:2501.02831</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2501.02831">pdf</a>, <a href="https://arxiv.org/format/2501.02831">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Universal Features Guided Zero-Shot Category-Level Object Pose Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Qu%2C+W">Wentian Qu</a>, <a href="/search/cs?searchtype=author&amp;query=Meng%2C+C">Chenyu Meng</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+H">Heng Li</a>, <a href="/search/cs?searchtype=author&amp;query=Cheng%2C+J">Jian Cheng</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+C">Cuixia Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+H">Hongan Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+X">Xiao Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Deng%2C+X">Xiaoming Deng</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+P">Ping Tan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02831v1-abstract-short" style="display: inline;"> Object pose estimation, crucial in computer vision and robotics applications, faces challenges with the diversity of unseen categories. We propose a zero-shot method to achieve category-level 6-DOF object pose estimation, which exploits both 2D and 3D universal features of input RGB-D image to establish semantic similarity-based correspondences and can be extended to unseen categories without addi&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02831v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02831v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02831v1-abstract-full" style="display: none;"> Object pose estimation, crucial in computer vision and robotics applications, faces challenges with the diversity of unseen categories. We propose a zero-shot method to achieve category-level 6-DOF object pose estimation, which exploits both 2D and 3D universal features of input RGB-D image to establish semantic similarity-based correspondences and can be extended to unseen categories without additional model fine-tuning. Our method begins with combining efficient 2D universal features to find sparse correspondences between intra-category objects and gets initial coarse pose. To handle the correspondence degradation of 2D universal features if the pose deviates much from the target pose, we use an iterative strategy to optimize the pose. Subsequently, to resolve pose ambiguities due to shape differences between intra-category objects, the coarse pose is refined by optimizing with dense alignment constraint of 3D universal features. Our method outperforms previous methods on the REAL275 and Wild6D benchmarks for unseen categories. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02831v1-abstract-full').style.display = 'none'; document.getElementById('2501.02831v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI2025</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&amp;query=Cheng%2C+J&amp;start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&amp;query=Cheng%2C+J&amp;start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Cheng%2C+J&amp;start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Cheng%2C+J&amp;start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Cheng%2C+J&amp;start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&amp;query=Cheng%2C+J&amp;start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">&hellip;</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10