CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 472 results for author: <span class="mathjax">Jiang, C</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Jiang%2C+C">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Jiang, C"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Jiang%2C+C&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Jiang, C"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Jiang%2C+C&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Jiang%2C+C&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Jiang%2C+C&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Jiang%2C+C&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Jiang%2C+C&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Jiang%2C+C&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14351">arXiv:2502.14351</a> <span> [<a href="https://arxiv.org/pdf/2502.14351">pdf</a>, <a href="https://arxiv.org/ps/2502.14351">ps</a>, <a href="https://arxiv.org/format/2502.14351">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SegAnyPET: Universal Promptable Segmentation from Positron Emission Tomography Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yichi Zhang</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+L">Le Xue</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenbo Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lanlan Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuchen Liu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chen Jiang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yuan Cheng</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+Y">Yuan Qi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14351v1-abstract-short" style="display: inline;"> Positron Emission Tomography (PET) imaging plays a crucial role in modern medical diagnostics by revealing the metabolic processes within a patient's body, which is essential for quantification of therapy response and monitoring treatment progress. However, the segmentation of PET images presents unique challenges due to their lower contrast and less distinct boundaries compared to other structura… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14351v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14351v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14351v1-abstract-full" style="display: none;"> Positron Emission Tomography (PET) imaging plays a crucial role in modern medical diagnostics by revealing the metabolic processes within a patient's body, which is essential for quantification of therapy response and monitoring treatment progress. However, the segmentation of PET images presents unique challenges due to their lower contrast and less distinct boundaries compared to other structural medical modalities. Recent developments in segmentation foundation models have shown superior versatility across diverse natural image segmentation tasks. Despite the efforts of medical adaptations, these works primarily focus on structural medical images with detailed physiological structural information and exhibit poor generalization ability when adapted to molecular PET imaging. In this paper, we collect and construct PETS-5k, the largest PET segmentation dataset to date, comprising 5,731 three-dimensional whole-body PET images and encompassing over 1.3M 2D images. Based on the established dataset, we develop SegAnyPET, a modality-specific 3D foundation model for universal promptable segmentation from PET images. To issue the challenge of discrepant annotation quality of PET images, we adopt a cross prompting confident learning (CPCL) strategy with an uncertainty-guided self-rectification process to robustly learn segmentation from high-quality labeled data and low-quality noisy labeled data. Experimental results demonstrate that SegAnyPET can correctly segment seen and unseen targets using only one or a few prompt points, outperforming state-of-the-art foundation models and task-specific fully supervised models with higher accuracy and strong generalization ability for universal segmentation. As the first foundation model for PET images, we believe that SegAnyPET will advance the applications to various downstream tasks for molecular imaging. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14351v1-abstract-full').style.display = 'none'; document.getElementById('2502.14351v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11019">arXiv:2502.11019</a> <span> [<a href="https://arxiv.org/pdf/2502.11019">pdf</a>, <a href="https://arxiv.org/format/2502.11019">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Unlocking the Power of Function Vectors for Characterizing and Mitigating Catastrophic Forgetting in Continual Instruction Tuning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+G">Gangwei Jiang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Caigao Jiang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhaoyi Li</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+S">Siqiao Xue</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jun Zhou</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linqi Song</a>, <a href="/search/cs?searchtype=author&query=Lian%2C+D">Defu Lian</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Y">Yin Wei</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11019v1-abstract-short" style="display: inline;"> Catastrophic forgetting (CF) poses a significant challenge in machine learning, where a model forgets previously learned information upon learning new tasks. Despite the advanced capabilities of Large Language Models (LLMs), they continue to face challenges with CF during continual learning. The majority of existing research focuses on analyzing forgetting patterns through a singular training sequ… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11019v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11019v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11019v1-abstract-full" style="display: none;"> Catastrophic forgetting (CF) poses a significant challenge in machine learning, where a model forgets previously learned information upon learning new tasks. Despite the advanced capabilities of Large Language Models (LLMs), they continue to face challenges with CF during continual learning. The majority of existing research focuses on analyzing forgetting patterns through a singular training sequence, thereby overlooking the intricate effects that diverse tasks have on model behavior. Our study explores CF across various settings, discovering that model forgetting is influenced by both the specific training tasks and the models themselves. To this end, we interpret forgetting by examining the function vector (FV), a compact representation of functions in LLMs, offering a model-dependent indicator for the occurrence of CF. Through theoretical and empirical analyses, we demonstrated that CF in LLMs primarily stems from biases in function activation rather than the overwriting of task processing functions. Leveraging these insights, we propose a novel function vector guided training methodology, incorporating a regularization technique to stabilize the FV and mitigate forgetting. Empirical tests on four benchmarks confirm the effectiveness of our proposed training method, substantiating our theoretical framework concerning CF and model function dynamics. We plan to make our code publicly accessible in the near future. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11019v1-abstract-full').style.display = 'none'; document.getElementById('2502.11019v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09290">arXiv:2502.09290</a> <span> [<a href="https://arxiv.org/pdf/2502.09290">pdf</a>, <a href="https://arxiv.org/format/2502.09290">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Dynamic Rolling Horizon Optimization for Network-Constrained V2X Value Stacking of Electric Vehicles Under Uncertainties </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Canchen Jiang</a>, <a href="/search/cs?searchtype=author&query=Liebman%2C+A">Ariel Liebman</a>, <a href="/search/cs?searchtype=author&query=Jie%2C+B">Bo Jie</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hao Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09290v1-abstract-short" style="display: inline;"> Electric vehicle (EV) coordination can provide significant benefits through vehicle-to-everything (V2X) by interacting with the grid, buildings, and other EVs. This work aims to develop a V2X value-stacking framework, including vehicle-to-building (V2B), vehicle-to-grid (V2G), and energy trading, to maximize economic benefits for residential communities while maintaining distribution voltage. This… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09290v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09290v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09290v1-abstract-full" style="display: none;"> Electric vehicle (EV) coordination can provide significant benefits through vehicle-to-everything (V2X) by interacting with the grid, buildings, and other EVs. This work aims to develop a V2X value-stacking framework, including vehicle-to-building (V2B), vehicle-to-grid (V2G), and energy trading, to maximize economic benefits for residential communities while maintaining distribution voltage. This work also seeks to quantify the impact of prediction errors related to building load, renewable energy, and EV arrivals. A dynamic rolling-horizon optimization (RHO) method is employed to leverage multiple revenue streams and maximize the potential of EV coordination. To address energy uncertainties, including hourly local building load, local photovoltaic (PV) generation, and EV arrivals, this work develops a Transformer-based forecasting model named Gated Recurrent Units-Encoder-Temporal Fusion Decoder (GRU-EN-TFD). The simulation results, using real data from Australia's National Electricity Market, and the Independent System Operators in New England and New York in the US, reveal that V2X value stacking can significantly reduce energy costs. The proposed GRU-EN-TFD model outperforms the benchmark forecast model. Uncertainties in EV arrivals have a more substantial impact on value-stacking performance, highlighting the significance of its accurate forecast. This work provides new insights into the dynamic interactions among residential communities, unlocking the full potential of EV batteries. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09290v1-abstract-full').style.display = 'none'; document.getElementById('2502.09290v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, accepted by Renewable Energy</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Renewable Energy, 2025 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04066">arXiv:2502.04066</a> <span> [<a href="https://arxiv.org/pdf/2502.04066">pdf</a>, <a href="https://arxiv.org/format/2502.04066">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Predicting Large Language Model Capabilities on Closed-Book QA Tasks Using Only Information Available Prior to Training </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Changhao Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Ming Zhang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+J">Junjie Ye</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+X">Xiaoran Fan</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yifei Cao</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jiajun Sun</a>, <a href="/search/cs?searchtype=author&query=Xi%2C+Z">Zhiheng Xi</a>, <a href="/search/cs?searchtype=author&query=Dou%2C+S">Shihan Dou</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Y">Yi Dong</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yujiong Shen</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+J">Jingqi Tong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhen Wang</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+T">Tao Liang</a>, <a href="/search/cs?searchtype=author&query=Fei%2C+Z">Zhihui Fei</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+M">Mingyang Wan</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+G">Guojun Ma</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qi Zhang</a>, <a href="/search/cs?searchtype=author&query=Gui%2C+T">Tao Gui</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xuanjing Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04066v1-abstract-short" style="display: inline;"> The GPT-4 technical report from OpenAI suggests that model performance on specific tasks can be predicted prior to training, though methodologies remain unspecified. This approach is crucial for optimizing resource allocation and ensuring data alignment with target tasks. To achieve this vision, we focus on predicting performance on Closed-book Question Answering (CBQA) tasks, which are closely ti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04066v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04066v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04066v1-abstract-full" style="display: none;"> The GPT-4 technical report from OpenAI suggests that model performance on specific tasks can be predicted prior to training, though methodologies remain unspecified. This approach is crucial for optimizing resource allocation and ensuring data alignment with target tasks. To achieve this vision, we focus on predicting performance on Closed-book Question Answering (CBQA) tasks, which are closely tied to pre-training data and knowledge retention. We address three major challenges: 1) mastering the entire pre-training process, especially data construction; 2) evaluating a model's knowledge retention; and 3) predicting task-specific knowledge retention using only information available prior to training. To tackle these challenges, we pre-train three large language models (i.e., 1.6B, 7B, and 13B) using 560k dollars and 520k GPU hours. We analyze the pre-training data with knowledge triples and assess knowledge retention using established methods. Additionally, we introduce the SMI metric, an information-theoretic measure that quantifies the relationship between pre-training data, model size, and task-specific knowledge retention. Our experiments reveal a strong linear correlation ($\text{R}^2 > 0.84$) between the SMI metric and the model's accuracy on CBQA tasks across models of varying sizes (i.e., 1.1B, 1.6B, 7B, and 13B). The dataset, model, and code are available at https://github.com/yuhui1038/SMI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04066v1-abstract-full').style.display = 'none'; document.getElementById('2502.04066v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04043">arXiv:2502.04043</a> <span> [<a href="https://arxiv.org/pdf/2502.04043">pdf</a>, <a href="https://arxiv.org/format/2502.04043">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Probe-Free Low-Rank Activation Intervention </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chonghe Jiang</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+B">Bao Nguyen</a>, <a href="/search/cs?searchtype=author&query=So%2C+A+M">Anthony Man-Cho So</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+V+A">Viet Anh Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04043v1-abstract-short" style="display: inline;"> Language models (LMs) can produce texts that appear accurate and coherent but contain untruthful or toxic content. Inference-time interventions that edit the hidden activations have shown promising results in steering the LMs towards desirable generations. Existing activation intervention methods often comprise an activation probe to detect undesirable generation, triggering the activation modific… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04043v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04043v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04043v1-abstract-full" style="display: none;"> Language models (LMs) can produce texts that appear accurate and coherent but contain untruthful or toxic content. Inference-time interventions that edit the hidden activations have shown promising results in steering the LMs towards desirable generations. Existing activation intervention methods often comprise an activation probe to detect undesirable generation, triggering the activation modification to steer subsequent generation. This paper proposes a probe-free intervention method FLORAIN for all attention heads in a specific activation layer. It eliminates the need to train classifiers for probing purposes. The intervention function is parametrized by a sample-wise nonlinear low-rank mapping, which is trained by minimizing the distance between the modified activations and their projection onto the manifold of desirable content. Under specific constructions of the manifold and projection distance, we show that the intervention strategy can be computed efficiently by solving a smooth optimization problem. The empirical results, benchmarked on multiple base models, demonstrate that FLORAIN consistently outperforms several baseline methods in enhancing model truthfulness and quality across generation and multiple-choice tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04043v1-abstract-full').style.display = 'none'; document.getElementById('2502.04043v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NAACL 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03639">arXiv:2502.03639</a> <span> [<a href="https://arxiv.org/pdf/2502.03639">pdf</a>, <a href="https://arxiv.org/format/2502.03639">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Towards Physical Understanding in Video Generation: A 3D Point Regularization Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yunuo Chen</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+J">Junli Cao</a>, <a href="/search/cs?searchtype=author&query=Kag%2C+A">Anil Kag</a>, <a href="/search/cs?searchtype=author&query=Goel%2C+V">Vidit Goel</a>, <a href="/search/cs?searchtype=author&query=Korolev%2C+S">Sergei Korolev</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chenfanfu Jiang</a>, <a href="/search/cs?searchtype=author&query=Tulyakov%2C+S">Sergey Tulyakov</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+J">Jian Ren</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03639v1-abstract-short" style="display: inline;"> We present a novel video generation framework that integrates 3-dimensional geometry and dynamic awareness. To achieve this, we augment 2D videos with 3D point trajectories and align them in pixel space. The resulting 3D-aware video dataset, PointVid, is then used to fine-tune a latent diffusion model, enabling it to track 2D objects with 3D Cartesian coordinates. Building on this, we regularize t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03639v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03639v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03639v1-abstract-full" style="display: none;"> We present a novel video generation framework that integrates 3-dimensional geometry and dynamic awareness. To achieve this, we augment 2D videos with 3D point trajectories and align them in pixel space. The resulting 3D-aware video dataset, PointVid, is then used to fine-tune a latent diffusion model, enabling it to track 2D objects with 3D Cartesian coordinates. Building on this, we regularize the shape and motion of objects in the video to eliminate undesired artifacts, \eg, nonphysical deformation. Consequently, we enhance the quality of generated RGB videos and alleviate common issues like object morphing, which are prevalent in current video models due to a lack of shape awareness. With our 3D augmentation and regularization, our model is capable of handling contact-rich scenarios such as task-oriented videos. These videos involve complex interactions of solids, where 3D information is essential for perceiving deformation and contact. Furthermore, our model improves the overall quality of video generation by promoting the 3D consistency of moving objects and reducing abrupt changes in shape and motion. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03639v1-abstract-full').style.display = 'none'; document.getElementById('2502.03639v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project Page: \url{https://snap-research.github.io/PointVidGen/}</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03449">arXiv:2502.03449</a> <span> [<a href="https://arxiv.org/pdf/2502.03449">pdf</a>, <a href="https://arxiv.org/format/2502.03449">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Dress-1-to-3: Single Image to Simulation-Ready 3D Outfit with Diffusion Prior and Differentiable Physics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuan Li</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+C">Chang Yu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+W">Wenxin Du</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Ying Jiang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianyi Xie</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yunuo Chen</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yin Yang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chenfanfu Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03449v1-abstract-short" style="display: inline;"> Recent advances in large models have significantly advanced image-to-3D reconstruction. However, the generated models are often fused into a single piece, limiting their applicability in downstream tasks. This paper focuses on 3D garment generation, a key area for applications like virtual try-on with dynamic garment animations, which require garments to be separable and simulation-ready. We intro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03449v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03449v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03449v1-abstract-full" style="display: none;"> Recent advances in large models have significantly advanced image-to-3D reconstruction. However, the generated models are often fused into a single piece, limiting their applicability in downstream tasks. This paper focuses on 3D garment generation, a key area for applications like virtual try-on with dynamic garment animations, which require garments to be separable and simulation-ready. We introduce Dress-1-to-3, a novel pipeline that reconstructs physics-plausible, simulation-ready separated garments with sewing patterns and humans from an in-the-wild image. Starting with the image, our approach combines a pre-trained image-to-sewing pattern generation model for creating coarse sewing patterns with a pre-trained multi-view diffusion model to produce multi-view images. The sewing pattern is further refined using a differentiable garment simulator based on the generated multi-view images. Versatile experiments demonstrate that our optimization approach substantially enhances the geometric alignment of the reconstructed 3D garments and humans with the input image. Furthermore, by integrating a texture generation module and a human motion generation module, we produce customized physics-plausible and realistic dynamic garment demonstrations. Project page: https://dress-1-to-3.github.io/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03449v1-abstract-full').style.display = 'none'; document.getElementById('2502.03449v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://dress-1-to-3.github.io/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18367">arXiv:2501.18367</a> <span> [<a href="https://arxiv.org/pdf/2501.18367">pdf</a>, <a href="https://arxiv.org/format/2501.18367">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Learnable Multi-views Contrastive Framework with Reconstruction Discrepancy for Medical Time-Series </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yifan Wang</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+H">Hongfeng Ai</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruiqi Li</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+M">Maowei Jiang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Cheng Jiang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chenzhong Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18367v1-abstract-short" style="display: inline;"> In medical time series disease diagnosis, two key challenges are identified.First, the high annotation cost of medical data leads to overfitting in models trained on label-limited, single-center datasets. To address this, we propose incorporating external data from related tasks and leveraging AE-GAN to extract prior knowledge,providing valuable references for downstream tasks. Second, many existi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18367v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18367v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18367v1-abstract-full" style="display: none;"> In medical time series disease diagnosis, two key challenges are identified.First, the high annotation cost of medical data leads to overfitting in models trained on label-limited, single-center datasets. To address this, we propose incorporating external data from related tasks and leveraging AE-GAN to extract prior knowledge,providing valuable references for downstream tasks. Second, many existing studies employ contrastive learning to derive more generalized medical sequence representations for diagnostic tasks, usually relying on manually designed diverse positive and negative sample pairs.However, these approaches are complex, lack generalizability, and fail to adaptively capture disease-specific features across different conditions.To overcome this, we introduce LMCF (Learnable Multi-views Contrastive Framework), a framework that integrates a multi-head attention mechanism and adaptively learns representations from different views through inter-view and intra-view contrastive learning strategies.Additionally, the pre-trained AE-GAN is used to reconstruct discrepancies in the target data as disease probabilities, which are then integrated into the contrastive learning process.Experiments on three target datasets demonstrate that our method consistently outperforms seven other baselines, highlighting its significant impact on healthcare applications such as the diagnosis of myocardial infarction, Alzheimer's disease, and Parkinson's disease. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18367v1-abstract-full').style.display = 'none'; document.getElementById('2501.18367v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages,6 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.6; K.3.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16737">arXiv:2501.16737</a> <span> [<a href="https://arxiv.org/pdf/2501.16737">pdf</a>, <a href="https://arxiv.org/ps/2501.16737">ps</a>, <a href="https://arxiv.org/format/2501.16737">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Consistency Diffusion Models for Single-Image 3D Reconstruction with Priors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chenru Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chengrui Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xi Yang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jie Sun</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yifei Zhang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+B">Bin Dong</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaizhu Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16737v2-abstract-short" style="display: inline;"> This paper delves into the study of 3D point cloud reconstruction from a single image. Our objective is to develop the Consistency Diffusion Model, exploring synergistic 2D and 3D priors in the Bayesian framework to ensure superior consistency in the reconstruction process, a challenging yet critical requirement in this field. Specifically, we introduce a pioneering training framework under diffus… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16737v2-abstract-full').style.display = 'inline'; document.getElementById('2501.16737v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16737v2-abstract-full" style="display: none;"> This paper delves into the study of 3D point cloud reconstruction from a single image. Our objective is to develop the Consistency Diffusion Model, exploring synergistic 2D and 3D priors in the Bayesian framework to ensure superior consistency in the reconstruction process, a challenging yet critical requirement in this field. Specifically, we introduce a pioneering training framework under diffusion models that brings two key innovations. First, we convert 3D structural priors derived from the initial 3D point cloud as a bound term to increase evidence in the variational Bayesian framework, leveraging these robust intrinsic priors to tightly govern the diffusion training process and bolster consistency in reconstruction. Second, we extract and incorporate 2D priors from the single input image, projecting them onto the 3D point cloud to enrich the guidance for diffusion training. Our framework not only sidesteps potential model learning shifts that may arise from directly imposing additional constraints during training but also precisely transposes the 2D priors into the 3D domain. Extensive experimental evaluations reveal that our approach sets new benchmarks in both synthetic and real-world datasets. The code is included with the submission. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16737v2-abstract-full').style.display = 'none'; document.getElementById('2501.16737v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16550">arXiv:2501.16550</a> <span> [<a href="https://arxiv.org/pdf/2501.16550">pdf</a>, <a href="https://arxiv.org/format/2501.16550">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PhysAnimator: Physics-Guided Generative Cartoon Animation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianyi Xie</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yiwei Zhao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Ying Jiang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chenfanfu Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16550v1-abstract-short" style="display: inline;"> Creating hand-drawn animation sequences is labor-intensive and demands professional expertise. We introduce PhysAnimator, a novel approach for generating physically plausible meanwhile anime-stylized animation from static anime illustrations. Our method seamlessly integrates physics-based simulations with data-driven generative models to produce dynamic and visually compelling animations. To captu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16550v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16550v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16550v1-abstract-full" style="display: none;"> Creating hand-drawn animation sequences is labor-intensive and demands professional expertise. We introduce PhysAnimator, a novel approach for generating physically plausible meanwhile anime-stylized animation from static anime illustrations. Our method seamlessly integrates physics-based simulations with data-driven generative models to produce dynamic and visually compelling animations. To capture the fluidity and exaggeration characteristic of anime, we perform image-space deformable body simulations on extracted mesh geometries. We enhance artistic control by introducing customizable energy strokes and incorporating rigging point support, enabling the creation of tailored animation effects such as wind interactions. Finally, we extract and warp sketches from the simulation sequence, generating a texture-agnostic representation, and employ a sketch-guided video diffusion model to synthesize high-quality animation frames. The resulting animations exhibit temporal consistency and visual plausibility, demonstrating the effectiveness of our method in creating dynamic anime-style animations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16550v1-abstract-full').style.display = 'none'; document.getElementById('2501.16550v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16237">arXiv:2501.16237</a> <span> [<a href="https://arxiv.org/pdf/2501.16237">pdf</a>, <a href="https://arxiv.org/format/2501.16237">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Instrumentation and Detectors">physics.ins-det</span> </div> </div> <p class="title is-5 mathjax"> Application of Structured State Space Models to High energy physics with locality-sensitive hashing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Cheng Jiang</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+S">Sitian Qian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16237v1-abstract-short" style="display: inline;"> Modern high-energy physics (HEP) experiments are increasingly challenged by the vast size and complexity of their datasets, particularly regarding large-scale point cloud processing and long sequences. In this study, to address these challenges, we explore the application of structured state space models (SSMs), proposing one of the first trials to integrate local-sensitive hashing into either a h… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16237v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16237v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16237v1-abstract-full" style="display: none;"> Modern high-energy physics (HEP) experiments are increasingly challenged by the vast size and complexity of their datasets, particularly regarding large-scale point cloud processing and long sequences. In this study, to address these challenges, we explore the application of structured state space models (SSMs), proposing one of the first trials to integrate local-sensitive hashing into either a hybrid or pure Mamba Model. Our results demonstrate that pure SSMs could serve as powerful backbones for HEP problems involving tasks for long sequence data with local inductive bias. By integrating locality-sensitive hashing into Mamba blocks, we achieve significant improvements over traditional backbones in key HEP tasks, surpassing them in inference speed and physics metrics while reducing computational overhead. In key tests, our approach demonstrated promising results, presenting a viable alternative to traditional transformer backbones by significantly reducing FLOPS while maintaining robust performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16237v1-abstract-full').style.display = 'none'; document.getElementById('2501.16237v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 figures, accepted by AISTATS 2025 as poster, camera ready versions to be updated</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15995">arXiv:2501.15995</a> <span> [<a href="https://arxiv.org/pdf/2501.15995">pdf</a>, <a href="https://arxiv.org/format/2501.15995">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Brain-Inspired Decentralized Satellite Learning in Space Computing Power Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+P">Peng Yang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Ting Wang</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+H">Haibin Cai</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yuanming Shi</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chunxiao Jiang</a>, <a href="/search/cs?searchtype=author&query=Kuang%2C+L">Linling Kuang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15995v1-abstract-short" style="display: inline;"> Satellite networks are able to collect massive space information with advanced remote sensing technologies, which is essential for real-time applications such as natural disaster monitoring. However, traditional centralized processing by the ground server incurs a severe timeliness issue caused by the transmission bottleneck of raw data. To this end, Space Computing Power Networks (Space-CPN) emer… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15995v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15995v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15995v1-abstract-full" style="display: none;"> Satellite networks are able to collect massive space information with advanced remote sensing technologies, which is essential for real-time applications such as natural disaster monitoring. However, traditional centralized processing by the ground server incurs a severe timeliness issue caused by the transmission bottleneck of raw data. To this end, Space Computing Power Networks (Space-CPN) emerges as a promising architecture to coordinate the computing capability of satellites and enable on board data processing. Nevertheless, due to the natural limitations of solar panels, satellite power system is difficult to meet the energy requirements for ever-increasing intelligent computation tasks of artificial neural networks. To tackle this issue, we propose to employ spiking neural networks (SNNs), which is supported by the neuromorphic computing architecture, for on-board data processing. The extreme sparsity in its computation enables a high energy efficiency. Furthermore, to achieve effective training of these on-board models, we put forward a decentralized neuromorphic learning framework, where a communication-efficient inter-plane model aggregation method is developed with the inspiration from RelaySum. We provide a theoretical analysis to characterize the convergence behavior of the proposed algorithm, which reveals a network diameter related convergence speed. We then formulate a minimum diameter spanning tree problem on the inter-plane connectivity topology and solve it to further improve the learning performance. Extensive experiments are conducted to evaluate the superiority of the proposed method over benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15995v1-abstract-full').style.display = 'none'; document.getElementById('2501.15995v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14743">arXiv:2501.14743</a> <span> [<a href="https://arxiv.org/pdf/2501.14743">pdf</a>, <a href="https://arxiv.org/format/2501.14743">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> KVDirect: Distributed Disaggregated LLM Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shiyang Chen</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+R">Rain Jiang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dezhi Yu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jinlai Xu</a>, <a href="/search/cs?searchtype=author&query=Chao%2C+M">Mengyuan Chao</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+F">Fanlong Meng</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chenyu Jiang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+W">Wei Xu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14743v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have become the new foundation for many applications, reshaping human society like a storm. Disaggregated inference, which separates prefill and decode stages, is a promising approach to improving hardware utilization and service quality. However, due to inefficient inter-node communication, existing systems restrict disaggregated inference to a single node, limiting r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14743v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14743v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14743v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have become the new foundation for many applications, reshaping human society like a storm. Disaggregated inference, which separates prefill and decode stages, is a promising approach to improving hardware utilization and service quality. However, due to inefficient inter-node communication, existing systems restrict disaggregated inference to a single node, limiting resource allocation flexibility and reducing service capacity. This paper introduces KVDirect, which optimizes KV cache transfer to enable a distributed disaggregated LLM inference. KVDirect achieves this through the following contributions. First, we propose a novel tensor-centric communication mechanism that reduces the synchronization overhead in traditional distributed GPU systems. Second, we design a custom communication library to support dynamic GPU resource scheduling and efficient KV cache transfer. Third, we introduce a pull-based KV cache transfer strategy that reduces GPU resource idling and improves latency. Finally, we implement KVDirect as an open-source LLM inference framework. Our evaluation demonstrates that KVDirect reduces per-request latency by 55% compared to the baseline across diverse workloads under the same resource constraints. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14743v1-abstract-full').style.display = 'none'; document.getElementById('2501.14743v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13975">arXiv:2501.13975</a> <span> [<a href="https://arxiv.org/pdf/2501.13975">pdf</a>, <a href="https://arxiv.org/format/2501.13975">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> 3DGS$^2$: Near Second-order Converging 3D Gaussian Splatting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lan%2C+L">Lei Lan</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+T">Tianjia Shao</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Z">Zixuan Lu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yu Zhang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chenfanfu Jiang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yin Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13975v2-abstract-short" style="display: inline;"> 3D Gaussian Splatting (3DGS) has emerged as a mainstream solution for novel view synthesis and 3D reconstruction. By explicitly encoding a 3D scene using a collection of Gaussian kernels, 3DGS achieves high-quality rendering with superior efficiency. As a learning-based approach, 3DGS training has been dealt with the standard stochastic gradient descent (SGD) method, which offers at most linear co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13975v2-abstract-full').style.display = 'inline'; document.getElementById('2501.13975v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13975v2-abstract-full" style="display: none;"> 3D Gaussian Splatting (3DGS) has emerged as a mainstream solution for novel view synthesis and 3D reconstruction. By explicitly encoding a 3D scene using a collection of Gaussian kernels, 3DGS achieves high-quality rendering with superior efficiency. As a learning-based approach, 3DGS training has been dealt with the standard stochastic gradient descent (SGD) method, which offers at most linear convergence. Consequently, training often requires tens of minutes, even with GPU acceleration. This paper introduces a (near) second-order convergent training algorithm for 3DGS, leveraging its unique properties. Our approach is inspired by two key observations. First, the attributes of a Gaussian kernel contribute independently to the image-space loss, which endorses isolated and local optimization algorithms. We exploit this by splitting the optimization at the level of individual kernel attributes, analytically constructing small-size Newton systems for each parameter group, and efficiently solving these systems on GPU threads. This achieves Newton-like convergence per training image without relying on the global Hessian. Second, kernels exhibit sparse and structured coupling across input images. This property allows us to effectively utilize spatial information to mitigate overshoot during stochastic training. Our method converges an order faster than standard GPU-based 3DGS training, requiring over $10\times$ fewer iterations while maintaining or surpassing the quality of the compared with the SGD-based 3DGS reconstructions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13975v2-abstract-full').style.display = 'none'; document.getElementById('2501.13975v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12599">arXiv:2501.12599</a> <span> [<a href="https://arxiv.org/pdf/2501.12599">pdf</a>, <a href="https://arxiv.org/format/2501.12599">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Kimi k1.5: Scaling Reinforcement Learning with LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kimi+Team"> Kimi Team</a>, <a href="/search/cs?searchtype=author&query=Du%2C+A">Angang Du</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+B">Bofei Gao</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+B">Bowei Xing</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Changjiu Jiang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Cheng Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Cheng Li</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+C">Chenjun Xiao</a>, <a href="/search/cs?searchtype=author&query=Du%2C+C">Chenzhuang Du</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+C">Chonghua Liao</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+C">Chuning Tang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Congcong Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Dehao Zhang</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+E">Enming Yuan</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+E">Enzhe Lu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+F">Fengxiang Tang</a>, <a href="/search/cs?searchtype=author&query=Sung%2C+F">Flood Sung</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+G">Guangda Wei</a>, <a href="/search/cs?searchtype=author&query=Lai%2C+G">Guokun Lai</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+H">Haiqing Guo</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+H">Han Zhu</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+H">Hao Ding</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+H">Hao Hu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hao Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hao Zhang</a> , et al. (69 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12599v1-abstract-short" style="display: inline;"> Language model pretraining with next token prediction has proved effective for scaling compute but is limited to the amount of available training data. Scaling reinforcement learning (RL) unlocks a new axis for the continued improvement of artificial intelligence, with the promise that large language models (LLMs) can scale their training data by learning to explore with rewards. However, prior pu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12599v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12599v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12599v1-abstract-full" style="display: none;"> Language model pretraining with next token prediction has proved effective for scaling compute but is limited to the amount of available training data. Scaling reinforcement learning (RL) unlocks a new axis for the continued improvement of artificial intelligence, with the promise that large language models (LLMs) can scale their training data by learning to explore with rewards. However, prior published work has not produced competitive results. In light of this, we report on the training practice of Kimi k1.5, our latest multi-modal LLM trained with RL, including its RL training techniques, multi-modal data recipes, and infrastructure optimization. Long context scaling and improved policy optimization methods are key ingredients of our approach, which establishes a simplistic, effective RL framework without relying on more complex techniques such as Monte Carlo tree search, value functions, and process reward models. Notably, our system achieves state-of-the-art reasoning performance across multiple benchmarks and modalities -- e.g., 77.5 on AIME, 96.2 on MATH 500, 94-th percentile on Codeforces, 74.9 on MathVista -- matching OpenAI's o1. Moreover, we present effective long2short methods that use long-CoT techniques to improve short-CoT models, yielding state-of-the-art short-CoT reasoning results -- e.g., 60.8 on AIME, 94.6 on MATH500, 47.3 on LiveCodeBench -- outperforming existing short-CoT models such as GPT-4o and Claude Sonnet 3.5 by a large margin (up to +550%). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12599v1-abstract-full').style.display = 'none'; document.getElementById('2501.12599v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">25 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09980">arXiv:2501.09980</a> <span> [<a href="https://arxiv.org/pdf/2501.09980">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Aneumo: A Large-Scale Comprehensive Synthetic Dataset of Aneurysm Hemodynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xigui Li</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yuanye Zhou</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+F">Feiyang Xiao</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+X">Xin Guo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yichi Zhang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chen Jiang</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+J">Jianchao Ge</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiansheng Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qimeng Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Taiwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+C">Chensen Lin</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yuan Cheng</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+Y">Yuan Qi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09980v1-abstract-short" style="display: inline;"> Intracranial aneurysm (IA) is a common cerebrovascular disease that is usually asymptomatic but may cause severe subarachnoid hemorrhage (SAH) if ruptured. Although clinical practice is usually based on individual factors and morphological features of the aneurysm, its pathophysiology and hemodynamic mechanisms remain controversial. To address the limitations of current research, this study constr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09980v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09980v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09980v1-abstract-full" style="display: none;"> Intracranial aneurysm (IA) is a common cerebrovascular disease that is usually asymptomatic but may cause severe subarachnoid hemorrhage (SAH) if ruptured. Although clinical practice is usually based on individual factors and morphological features of the aneurysm, its pathophysiology and hemodynamic mechanisms remain controversial. To address the limitations of current research, this study constructed a comprehensive hemodynamic dataset of intracranial aneurysms. The dataset is based on 466 real aneurysm models, and 10,000 synthetic models were generated by resection and deformation operations, including 466 aneurysm-free models and 9,534 deformed aneurysm models. The dataset also provides medical image-like segmentation mask files to support insightful analysis. In addition, the dataset contains hemodynamic data measured at eight steady-state flow rates (0.001 to 0.004 kg/s), including critical parameters such as flow velocity, pressure, and wall shear stress, providing a valuable resource for investigating aneurysm pathogenesis and clinical prediction. This dataset will help advance the understanding of the pathologic features and hemodynamic mechanisms of intracranial aneurysms and support in-depth research in related fields. Dataset hosted at https://github.com/Xigui-Li/Aneumo. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09980v1-abstract-full').style.display = 'none'; document.getElementById('2501.09980v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.07767">arXiv:2501.07767</a> <span> [<a href="https://arxiv.org/pdf/2501.07767">pdf</a>, <a href="https://arxiv.org/format/2501.07767">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/MICRO61859.2024.00116">10.1109/MICRO61859.2024.00116 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> HgPCN: A Heterogeneous Architecture for E2E Embedded Point Cloud Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yiming Gao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chao Jiang</a>, <a href="/search/cs?searchtype=author&query=Piard%2C+W">Wesley Piard</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiangru Chen</a>, <a href="/search/cs?searchtype=author&query=Patel%2C+B">Bhavesh Patel</a>, <a href="/search/cs?searchtype=author&query=Lam%2C+H">Herman Lam</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.07767v1-abstract-short" style="display: inline;"> Point cloud is an important type of geometric data structure for many embedded applications such as autonomous driving and augmented reality. Current Point Cloud Networks (PCNs) have proven to achieve great success in using inference to perform point cloud analysis, including object part segmentation, shape classification, and so on. However, point cloud applications on the computing edge require… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07767v1-abstract-full').style.display = 'inline'; document.getElementById('2501.07767v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.07767v1-abstract-full" style="display: none;"> Point cloud is an important type of geometric data structure for many embedded applications such as autonomous driving and augmented reality. Current Point Cloud Networks (PCNs) have proven to achieve great success in using inference to perform point cloud analysis, including object part segmentation, shape classification, and so on. However, point cloud applications on the computing edge require more than just the inference step. They require an end-to-end (E2E) processing of the point cloud workloads: pre-processing of raw data, input preparation, and inference to perform point cloud analysis. Current PCN approaches to support end-to-end processing of point cloud workload cannot meet the real-time latency requirement on the edge, i.e., the ability of the AI service to keep up with the speed of raw data generation by 3D sensors. Latency for end-to-end processing of the point cloud workloads stems from two reasons: memory-intensive down-sampling in the pre-processing phase and the data structuring step for input preparation in the inference phase. In this paper, we present HgPCN, an end-to-end heterogeneous architecture for real-time embedded point cloud applications. In HgPCN, we introduce two novel methodologies based on spatial indexing to address the two identified bottlenecks. In the Pre-processing Engine of HgPCN, an Octree-Indexed-Sampling method is used to optimize the memory-intensive down-sampling bottleneck of the pre-processing phase. In the Inference Engine, HgPCN extends a commercial DLA with a customized Data Structuring Unit which is based on a Voxel-Expanded Gathering method to fundamentally reduce the workload of the data structuring step in the inference phase. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07767v1-abstract-full').style.display = 'none'; document.getElementById('2501.07767v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by MICRO2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06244">arXiv:2501.06244</a> <span> [<a href="https://arxiv.org/pdf/2501.06244">pdf</a>, <a href="https://arxiv.org/format/2501.06244">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Microservice Deployment in Space Computing Power Networks via Robust Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhiyong Yu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yuning Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xin Liu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yuanming Shi</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chunxiao Jiang</a>, <a href="/search/cs?searchtype=author&query=Kuang%2C+L">Linling Kuang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06244v1-abstract-short" style="display: inline;"> With the growing demand for Earth observation, it is important to provide reliable real-time remote sensing inference services to meet the low-latency requirements. The Space Computing Power Network (Space-CPN) offers a promising solution by providing onboard computing and extensive coverage capabilities for real-time inference. This paper presents a remote sensing artificial intelligence applicat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06244v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06244v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06244v1-abstract-full" style="display: none;"> With the growing demand for Earth observation, it is important to provide reliable real-time remote sensing inference services to meet the low-latency requirements. The Space Computing Power Network (Space-CPN) offers a promising solution by providing onboard computing and extensive coverage capabilities for real-time inference. This paper presents a remote sensing artificial intelligence applications deployment framework designed for Low Earth Orbit satellite constellations to achieve real-time inference performance. The framework employs the microservice architecture, decomposing monolithic inference tasks into reusable, independent modules to address high latency and resource heterogeneity. This distributed approach enables optimized microservice deployment, minimizing resource utilization while meeting quality of service and functional requirements. We introduce Robust Optimization to the deployment problem to address data uncertainty. Additionally, we model the Robust Optimization problem as a Partially Observable Markov Decision Process and propose a robust reinforcement learning algorithm to handle the semi-infinite Quality of Service constraints. Our approach yields sub-optimal solutions that minimize accuracy loss while maintaining acceptable computational costs. Simulation results demonstrate the effectiveness of our framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06244v1-abstract-full').style.display = 'none'; document.getElementById('2501.06244v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05107">arXiv:2501.05107</a> <span> [<a href="https://arxiv.org/pdf/2501.05107">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applied Physics">physics.app-ph</span> </div> </div> <p class="title is-5 mathjax"> Harnessing the Power of Vibration Motors to Develop Miniature Untethered Robotic Fishes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chongjie Jiang</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+Y">Yingying Dai</a>, <a href="/search/cs?searchtype=author&query=Le%2C+J">Jinyang Le</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xiaomeng Chen</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yu Xie</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Wei Zhou</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+F">Fuzhou Niu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Ying Li</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+T">Tao Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05107v1-abstract-short" style="display: inline;"> Miniature underwater robots play a crucial role in the exploration and development of marine resources, particularly in confined spaces and high-pressure deep-sea environments. This study presents the design, optimization, and performance of a miniature robotic fish, powered by the oscillation of bio-inspired fins. These fins feature a rigid-flexible hybrid structure and use an eccentric rotating… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05107v1-abstract-full').style.display = 'inline'; document.getElementById('2501.05107v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05107v1-abstract-full" style="display: none;"> Miniature underwater robots play a crucial role in the exploration and development of marine resources, particularly in confined spaces and high-pressure deep-sea environments. This study presents the design, optimization, and performance of a miniature robotic fish, powered by the oscillation of bio-inspired fins. These fins feature a rigid-flexible hybrid structure and use an eccentric rotating mass (ERM) vibration motor as the excitation source to generate high-frequency unidirectional oscillations that induce acoustic streaming for propulsion. The drive mechanism, powered by miniature ERM vibration motors, eliminates the need for complex mechanical drive systems, enabling complete isolation of the entire drive system from the external environment and facilitating the miniaturization of the robotic fish. A compact, untethered robotic fish, measuring 85*60*45 mm^3, is equipped with three bio-inspired fins located at the pectoral and caudal positions. Experimental results demonstrate that the robotic fish achieves a maximum forward swimming speed of 1.36 body lengths (BL) per second powered by all fins and minimum turning radius of 0.6 BL when powered by a single fin. These results underscore the significance of employing the ERM vibration motor in advancing the development of highly maneuverable, miniature untethered underwater robots for various marine exploration tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05107v1-abstract-full').style.display = 'none'; document.getElementById('2501.05107v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01743">arXiv:2501.01743</a> <span> [<a href="https://arxiv.org/pdf/2501.01743">pdf</a>, <a href="https://arxiv.org/format/2501.01743">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Automating Legal Concept Interpretation with LLMs: Retrieval, Generation, and Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+K">Kangcheng Luo</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Q">Quzhe Huang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Cong Jiang</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Y">Yansong Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01743v2-abstract-short" style="display: inline;"> Legal articles often include vague concepts for adapting to the ever-changing society. Providing detailed interpretations of these concepts is a critical and challenging task even for legal practitioners. It requires meticulous and professional annotations and summarizations by legal experts, which are admittedly time-consuming and expensive to collect at scale. By emulating legal experts' doctrin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01743v2-abstract-full').style.display = 'inline'; document.getElementById('2501.01743v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01743v2-abstract-full" style="display: none;"> Legal articles often include vague concepts for adapting to the ever-changing society. Providing detailed interpretations of these concepts is a critical and challenging task even for legal practitioners. It requires meticulous and professional annotations and summarizations by legal experts, which are admittedly time-consuming and expensive to collect at scale. By emulating legal experts' doctrinal method, we introduce a novel framework, ATRIE, using large language models (LLMs) to AuTomatically Retrieve concept-related information, Interpret legal concepts, and Evaluate generated interpretations, eliminating dependence on legal experts. ATRIE comprises a legal concept interpreter and a legal concept interpretation evaluator. The interpreter uses LLMs to retrieve relevant information from judicial precedents and interpret legal concepts. The evaluator uses performance changes on legal concept entailment, a downstream task we propose, as a proxy of interpretation quality. Automatic and multifaceted human evaluations indicate that the quality of our interpretations is comparable to those written by legal experts, with superior comprehensiveness and readability. Although there remains a slight gap in accuracy, it can already assist legal practitioners in improving the efficiency of concept interpretation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01743v2-abstract-full').style.display = 'none'; document.getElementById('2501.01743v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.19105">arXiv:2412.19105</a> <span> [<a href="https://arxiv.org/pdf/2412.19105">pdf</a>, <a href="https://arxiv.org/format/2412.19105">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> How Can Haptic Feedback Assist People with Blind and Low Vision (BLV): A Systematic Literature Review </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chutian Jiang</a>, <a href="/search/cs?searchtype=author&query=Kuang%2C+E">Emily Kuang</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+M">Mingming Fan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.19105v1-abstract-short" style="display: inline;"> People who are blind or have low vision (BLV) encounter numerous challenges in their daily lives and work. To support them, various haptic assistive tools have been developed. Despite these advancements, the effective utilization of these tools -- including the optimal haptic feedback and on-body stimulation positions for different tasks along with their limitations -- remains poorly understood. R… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19105v1-abstract-full').style.display = 'inline'; document.getElementById('2412.19105v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.19105v1-abstract-full" style="display: none;"> People who are blind or have low vision (BLV) encounter numerous challenges in their daily lives and work. To support them, various haptic assistive tools have been developed. Despite these advancements, the effective utilization of these tools -- including the optimal haptic feedback and on-body stimulation positions for different tasks along with their limitations -- remains poorly understood. Recognizing these gaps, we conducted a systematic literature review spanning two decades (2004-2024) to evaluate the development of haptic assistive tools within the HCI community. Our findings reveal that these tools are primarily used for understanding graphical information, providing guidance and navigation, and facilitating education and training, among other life and work tasks. We identified three main limitations: hardware limitations, functionality limitations, and UX and evaluation methods limitations. Based on these insights, we discuss potential research avenues and offer suggestions for enhancing the effectiveness of future haptic assistive technologies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19105v1-abstract-full').style.display = 'none'; document.getElementById('2412.19105v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18697">arXiv:2412.18697</a> <span> [<a href="https://arxiv.org/pdf/2412.18697">pdf</a>, <a href="https://arxiv.org/format/2412.18697">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Agents on the Bench: Large Language Model Based Multi Agent Framework for Trustworthy Digital Justice </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Cong Jiang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaolei Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18697v1-abstract-short" style="display: inline;"> The justice system has increasingly employed AI techniques to enhance efficiency, yet limitations remain in improving the quality of decision-making, particularly regarding transparency and explainability needed to uphold public trust in legal AI. To address these challenges, we propose a large language model based multi-agent framework named AgentsBench, which aims to simultaneously improve both… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18697v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18697v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18697v1-abstract-full" style="display: none;"> The justice system has increasingly employed AI techniques to enhance efficiency, yet limitations remain in improving the quality of decision-making, particularly regarding transparency and explainability needed to uphold public trust in legal AI. To address these challenges, we propose a large language model based multi-agent framework named AgentsBench, which aims to simultaneously improve both efficiency and quality in judicial decision-making. Our approach leverages multiple LLM-driven agents that simulate the collaborative deliberation and decision making process of a judicial bench. We conducted experiments on legal judgment prediction task, and the results show that our framework outperforms existing LLM based methods in terms of performance and decision quality. By incorporating these elements, our framework reflects real-world judicial processes more closely, enhancing accuracy, fairness, and society consideration. AgentsBench provides a more nuanced and realistic methods of trustworthy AI decision-making, with strong potential for application across various case types and legal scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18697v1-abstract-full').style.display = 'none'; document.getElementById('2412.18697v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Draft version; Under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.17739">arXiv:2412.17739</a> <span> [<a href="https://arxiv.org/pdf/2412.17739">pdf</a>, <a href="https://arxiv.org/format/2412.17739">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Fourier Position Embedding: Enhancing Attention's Periodic Extension for Length Generalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hua%2C+E">Ermo Hua</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Che Jiang</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+X">Xingtai Lv</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kaiyan Zhang</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+N">Ning Ding</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Youbang Sun</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+B">Biqing Qi</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+Y">Yuchen Fan</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+X">Xuekai Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+B">Bowen Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.17739v2-abstract-short" style="display: inline;"> Extending the context length of Language Models (LMs) by improving Rotary Position Embedding (RoPE) has become a trend. While existing works mainly address RoPE's limitations within attention mechanism, this paper provides an analysis across nearly all parts of LMs, uncovering their adverse effects on length generalization for RoPE-based attention. Using Discrete Signal Processing theory, we show… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17739v2-abstract-full').style.display = 'inline'; document.getElementById('2412.17739v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.17739v2-abstract-full" style="display: none;"> Extending the context length of Language Models (LMs) by improving Rotary Position Embedding (RoPE) has become a trend. While existing works mainly address RoPE's limitations within attention mechanism, this paper provides an analysis across nearly all parts of LMs, uncovering their adverse effects on length generalization for RoPE-based attention. Using Discrete Signal Processing theory, we show that RoPE enables periodic attention by implicitly achieving Non-Uniform Discrete Fourier Transform. However, this periodicity is undermined by the spectral damage caused by: 1) linear layers and activation functions outside of attention; 2) insufficiently trained frequency components brought by time-domain truncation. Building on our observations, we propose Fourier Position Embedding (FoPE), which enhances attention's frequency-domain properties to improve both its periodic extension and length generalization. FoPE constructs Fourier Series and zero-outs the destructive frequency components, increasing model robustness against the spectrum damage. Experiments across various model scales show that, within varying context windows, FoPE can maintain a more stable perplexity and a more consistent accuracy in a needle-in-haystack task compared to RoPE and ALiBi. Several analyses and ablations bring further support to our method and theoretical modeling. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17739v2-abstract-full').style.display = 'none'; document.getElementById('2412.17739v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.17213">arXiv:2412.17213</a> <span> [<a href="https://arxiv.org/pdf/2412.17213">pdf</a>, <a href="https://arxiv.org/format/2412.17213">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Attack by Yourself: Effective and Unnoticeable Multi-Category Graph Backdoor Attacks with Subgraph Triggers Pool </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiangtong Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+D">Dungy Liu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+D">Dawei Cheng</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Changchun Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.17213v1-abstract-short" style="display: inline;"> \textbf{G}raph \textbf{N}eural \textbf{N}etworks~(GNNs) have achieved significant success in various real-world applications, including social networks, finance systems, and traffic management. Recent researches highlight their vulnerability to backdoor attacks in node classification, where GNNs trained on a poisoned graph misclassify a test node only when specific triggers are attached. These stu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17213v1-abstract-full').style.display = 'inline'; document.getElementById('2412.17213v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.17213v1-abstract-full" style="display: none;"> \textbf{G}raph \textbf{N}eural \textbf{N}etworks~(GNNs) have achieved significant success in various real-world applications, including social networks, finance systems, and traffic management. Recent researches highlight their vulnerability to backdoor attacks in node classification, where GNNs trained on a poisoned graph misclassify a test node only when specific triggers are attached. These studies typically focus on single attack categories and use adaptive trigger generators to create node-specific triggers. However, adaptive trigger generators typically have a simple structure, limited parameters, and lack category-aware graph knowledge, which makes them struggle to handle backdoor attacks across multiple categories as the number of target categories increases. We address this gap by proposing a novel approach for \textbf{E}ffective and \textbf{U}nnoticeable \textbf{M}ulti-\textbf{C}ategory~(EUMC) graph backdoor attacks, leveraging subgraph from the attacked graph as category-aware triggers to precisely control the target category. To ensure the effectiveness of our method, we construct a \textbf{M}ulti-\textbf{C}ategory \textbf{S}ubgraph \textbf{T}riggers \textbf{P}ool~(MC-STP) using the subgraphs of the attacked graph as triggers. We then exploit the attachment probability shifts of each subgraph trigger as category-aware priors for target category determination. Moreover, we develop a ``select then attach'' strategy that connects suitable category-aware trigger to attacked nodes for unnoticeability. Extensive experiments across different real-world datasets confirm the efficacy of our method in conducting multi-category graph backdoor attacks on various GNN models and defense strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17213v1-abstract-full').style.display = 'none'; document.getElementById('2412.17213v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.13520">arXiv:2412.13520</a> <span> [<a href="https://arxiv.org/pdf/2412.13520">pdf</a>, <a href="https://arxiv.org/format/2412.13520">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> ROMAS: A Role-Based Multi-Agent System for Database monitoring and Planning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yi Huang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+F">Fangyin Cheng</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+F">Fan Zhou</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiahui Li</a>, <a href="/search/cs?searchtype=author&query=Gong%2C+J">Jian Gong</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hongjun Yang</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+Z">Zhidong Fan</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Caigao Jiang</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+S">Siqiao Xue</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+F">Faqiang Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.13520v1-abstract-short" style="display: inline;"> In recent years, Large Language Models (LLMs) have demonstrated remarkable capabilities in data analytics when integrated with Multi-Agent Systems (MAS). However, these systems often struggle with complex tasks that involve diverse functional requirements and intricate data processing challenges, necessitating customized solutions that lack broad applicability. Furthermore, current MAS fail to emu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13520v1-abstract-full').style.display = 'inline'; document.getElementById('2412.13520v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.13520v1-abstract-full" style="display: none;"> In recent years, Large Language Models (LLMs) have demonstrated remarkable capabilities in data analytics when integrated with Multi-Agent Systems (MAS). However, these systems often struggle with complex tasks that involve diverse functional requirements and intricate data processing challenges, necessitating customized solutions that lack broad applicability. Furthermore, current MAS fail to emulate essential human-like traits such as self-planning, self-monitoring, and collaborative work in dynamic environments, leading to inefficiencies and resource wastage. To address these limitations, we propose ROMAS, a novel Role-Based M ulti-A gent System designed to adapt to various scenarios while enabling low code development and one-click deployment. ROMAS has been effectively deployed in DB-GPT [Xue et al., 2023a, 2024b], a well-known project utilizing LLM-powered database analytics, showcasing its practical utility in real-world scenarios. By integrating role-based collaborative mechanisms for self-monitoring and self-planning, and leveraging existing MAS capabilities to enhance database interactions, ROMAS offers a more effective and versatile solution. Experimental evaluations of ROMAS demonstrate its superiority across multiple scenarios, highlighting its potential to advance the field of multi-agent data analytics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13520v1-abstract-full').style.display = 'none'; document.getElementById('2412.13520v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.12129">arXiv:2412.12129</a> <span> [<a href="https://arxiv.org/pdf/2412.12129">pdf</a>, <a href="https://arxiv.org/format/2412.12129">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SceneDiffuser: Efficient and Controllable Driving Simulation Initialization and Rollout </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+C+M">Chiyu Max Jiang</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+Y">Yijing Bai</a>, <a href="/search/cs?searchtype=author&query=Cornman%2C+A">Andre Cornman</a>, <a href="/search/cs?searchtype=author&query=Davis%2C+C">Christopher Davis</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xiukun Huang</a>, <a href="/search/cs?searchtype=author&query=Jeon%2C+H">Hong Jeon</a>, <a href="/search/cs?searchtype=author&query=Kulshrestha%2C+S">Sakshum Kulshrestha</a>, <a href="/search/cs?searchtype=author&query=Lambert%2C+J">John Lambert</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shuangyu Li</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xuanyu Zhou</a>, <a href="/search/cs?searchtype=author&query=Fuertes%2C+C">Carlos Fuertes</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+C">Chang Yuan</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+M">Mingxing Tan</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yin Zhou</a>, <a href="/search/cs?searchtype=author&query=Anguelov%2C+D">Dragomir Anguelov</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.12129v1-abstract-short" style="display: inline;"> Realistic and interactive scene simulation is a key prerequisite for autonomous vehicle (AV) development. In this work, we present SceneDiffuser, a scene-level diffusion prior designed for traffic simulation. It offers a unified framework that addresses two key stages of simulation: scene initialization, which involves generating initial traffic layouts, and scene rollout, which encompasses the cl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12129v1-abstract-full').style.display = 'inline'; document.getElementById('2412.12129v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.12129v1-abstract-full" style="display: none;"> Realistic and interactive scene simulation is a key prerequisite for autonomous vehicle (AV) development. In this work, we present SceneDiffuser, a scene-level diffusion prior designed for traffic simulation. It offers a unified framework that addresses two key stages of simulation: scene initialization, which involves generating initial traffic layouts, and scene rollout, which encompasses the closed-loop simulation of agent behaviors. While diffusion models have been proven effective in learning realistic and multimodal agent distributions, several challenges remain, including controllability, maintaining realism in closed-loop simulations, and ensuring inference efficiency. To address these issues, we introduce amortized diffusion for simulation. This novel diffusion denoising paradigm amortizes the computational cost of denoising over future simulation steps, significantly reducing the cost per rollout step (16x less inference steps) while also mitigating closed-loop errors. We further enhance controllability through the introduction of generalized hard constraints, a simple yet effective inference-time constraint mechanism, as well as language-based constrained scene generation via few-shot prompting of a large language model (LLM). Our investigations into model scaling reveal that increased computational resources significantly improve overall simulation realism. We demonstrate the effectiveness of our approach on the Waymo Open Sim Agents Challenge, achieving top open-loop performance and the best closed-loop performance among diffusion models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12129v1-abstract-full').style.display = 'none'; document.getElementById('2412.12129v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NeurIPS 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68T07 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.09224">arXiv:2412.09224</a> <span> [<a href="https://arxiv.org/pdf/2412.09224">pdf</a>, <a href="https://arxiv.org/format/2412.09224">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DASK: Distribution Rehearsing via Adaptive Style Kernel Learning for Exemplar-Free Lifelong Person Re-Identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+K">Kunlun Xu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chenghao Jiang</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+P">Peixi Xiong</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+Y">Yuxin Peng</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jiahuan Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.09224v3-abstract-short" style="display: inline;"> Lifelong person re-identification (LReID) is an important but challenging task that suffers from catastrophic forgetting due to significant domain gaps between training steps. Existing LReID approaches typically rely on data replay and knowledge distillation to mitigate this issue. However, data replay methods compromise data privacy by storing historical exemplars, while knowledge distillation me… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09224v3-abstract-full').style.display = 'inline'; document.getElementById('2412.09224v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.09224v3-abstract-full" style="display: none;"> Lifelong person re-identification (LReID) is an important but challenging task that suffers from catastrophic forgetting due to significant domain gaps between training steps. Existing LReID approaches typically rely on data replay and knowledge distillation to mitigate this issue. However, data replay methods compromise data privacy by storing historical exemplars, while knowledge distillation methods suffer from limited performance due to the cumulative forgetting of undistilled knowledge. To overcome these challenges, we propose a novel paradigm that models and rehearses the distribution of the old domains to enhance knowledge consolidation during the new data learning, possessing a strong anti-forgetting capacity without storing any exemplars. Specifically, we introduce an exemplar-free LReID method called Distribution Rehearsing via Adaptive Style Kernel Learning (DASK). DASK includes a Distribution Rehearser Learning (DRL) mechanism that learns to transform arbitrary distribution data into the current data style at each learning step. To enhance the style transfer capacity of DRL, an Adaptive Kernel Prediction Network (AKPNet) is explored to achieve an instance-specific distribution adjustment. Additionally, we design a Distribution Rehearsing-driven LReID Training (DRRT) module, which rehearses old distribution based on the new data via the old AKPNet model, achieving effective new-old knowledge accumulation under a joint knowledge consolidation scheme. Experimental results show our DASK outperforms the existing methods by 3.6%-6.8% and 4.5%-6.5% on anti-forgetting and generalization capacity, respectively. Our code is available at https://github.com/zhoujiahuan1991/AAAI2025-LReID-DASK <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09224v3-abstract-full').style.display = 'none'; document.getElementById('2412.09224v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">in Proceedings of the 39th AAAI Conference on Artificial Intelligence (AAAI-25)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.09199">arXiv:2412.09199</a> <span> [<a href="https://arxiv.org/pdf/2412.09199">pdf</a>, <a href="https://arxiv.org/format/2412.09199">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MVC-VPR: Mutual Learning of Viewpoint Classification and Visual Place Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gu%2C+Q">Qiwen Gu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xufei Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+F">Fenglin Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Junqiao Zhao</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+S">Siyue Tao</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+C">Chen Ye</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+T">Tiantian Feng</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Changjun Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.09199v2-abstract-short" style="display: inline;"> Visual Place Recognition (VPR) aims to robustly identify locations by leveraging image retrieval based on descriptors encoded from environmental images. However, drastic appearance changes of images captured from different viewpoints at the same location pose incoherent supervision signals for descriptor learning, which severely hinder the performance of VPR. Previous work proposes classifying ima… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09199v2-abstract-full').style.display = 'inline'; document.getElementById('2412.09199v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.09199v2-abstract-full" style="display: none;"> Visual Place Recognition (VPR) aims to robustly identify locations by leveraging image retrieval based on descriptors encoded from environmental images. However, drastic appearance changes of images captured from different viewpoints at the same location pose incoherent supervision signals for descriptor learning, which severely hinder the performance of VPR. Previous work proposes classifying images based on manually defined rules or ground truth labels for viewpoints, followed by descriptor training based on the classification results. However, not all datasets have ground truth labels of viewpoints and manually defined rules may be suboptimal, leading to degraded descriptor performance.To address these challenges, we introduce the mutual learning of viewpoint self-classification and VPR. Starting from coarse classification based on geographical coordinates, we progress to finer classification of viewpoints using simple clustering techniques. The dataset is partitioned in an unsupervised manner while simultaneously training a descriptor extractor for place recognition. Experimental results show that this approach almost perfectly partitions the dataset based on viewpoints, thus achieving mutually reinforcing effects. Our method even excels state-of-the-art (SOTA) methods that partition datasets using ground truth labels. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09199v2-abstract-full').style.display = 'none'; document.getElementById('2412.09199v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.08969">arXiv:2412.08969</a> <span> [<a href="https://arxiv.org/pdf/2412.08969">pdf</a>, <a href="https://arxiv.org/format/2412.08969">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Deep Learning Model Security: Threats and Defenses </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianyang Wang</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Z">Ziqian Bi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yichao Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming Liu</a>, <a href="/search/cs?searchtype=author&query=Hsieh%2C+W">Weiche Hsieh</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+P">Pohsun Feng</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L+K+Q">Lawrence K. Q. Yan</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Y">Yizhu Wen</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Benji Peng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junyu Liu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keyu Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Sen Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Ming Li</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chuanqi Jiang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+X">Xinyuan Song</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Junjie Yang</a>, <a href="/search/cs?searchtype=author&query=Jing%2C+B">Bowen Jing</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+J">Jintao Ren</a>, <a href="/search/cs?searchtype=author&query=Song%2C+J">Junhao Song</a>, <a href="/search/cs?searchtype=author&query=Tseng%2C+H">Hong-Ming Tseng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Silin Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yunze Wang</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+C+X">Chia Xin Liang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jiawei Xu</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+X">Xuanhe Pan</a> , et al. (2 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.08969v2-abstract-short" style="display: inline;"> Deep learning has transformed AI applications but faces critical security challenges, including adversarial attacks, data poisoning, model theft, and privacy leakage. This survey examines these vulnerabilities, detailing their mechanisms and impact on model integrity and confidentiality. Practical implementations, including adversarial examples, label flipping, and backdoor attacks, are explored a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08969v2-abstract-full').style.display = 'inline'; document.getElementById('2412.08969v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.08969v2-abstract-full" style="display: none;"> Deep learning has transformed AI applications but faces critical security challenges, including adversarial attacks, data poisoning, model theft, and privacy leakage. This survey examines these vulnerabilities, detailing their mechanisms and impact on model integrity and confidentiality. Practical implementations, including adversarial examples, label flipping, and backdoor attacks, are explored alongside defenses such as adversarial training, differential privacy, and federated learning, highlighting their strengths and limitations. Advanced methods like contrastive and self-supervised learning are presented for enhancing robustness. The survey concludes with future directions, emphasizing automated defenses, zero-trust architectures, and the security challenges of large AI models. A balanced approach to performance and security is essential for developing reliable deep learning systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08969v2-abstract-full').style.display = 'none'; document.getElementById('2412.08969v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.03617">arXiv:2412.03617</a> <span> [<a href="https://arxiv.org/pdf/2412.03617">pdf</a>, <a href="https://arxiv.org/format/2412.03617">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> End-to-end Triple-domain PET Enhancement: A Hybrid Denoising-and-reconstruction Framework for Reconstructing Standard-dose PET Images from Low-dose PET Sinograms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Caiwen Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mianxin Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+K">Kaicong Sun</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+D">Dinggang Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.03617v1-abstract-short" style="display: inline;"> As a sensitive functional imaging technique, positron emission tomography (PET) plays a critical role in early disease diagnosis. However, obtaining a high-quality PET image requires injecting a sufficient dose (standard dose) of radionuclides into the body, which inevitably poses radiation hazards to patients. To mitigate radiation hazards, the reconstruction of standard-dose PET (SPET) from low-… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03617v1-abstract-full').style.display = 'inline'; document.getElementById('2412.03617v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.03617v1-abstract-full" style="display: none;"> As a sensitive functional imaging technique, positron emission tomography (PET) plays a critical role in early disease diagnosis. However, obtaining a high-quality PET image requires injecting a sufficient dose (standard dose) of radionuclides into the body, which inevitably poses radiation hazards to patients. To mitigate radiation hazards, the reconstruction of standard-dose PET (SPET) from low-dose PET (LPET) is desired. According to imaging theory, PET reconstruction process involves multiple domains (e.g., projection domain and image domain), and a significant portion of the difference between SPET and LPET arises from variations in the noise levels introduced during the sampling of raw data as sinograms. In light of these two facts, we propose an end-to-end TriPle-domain LPET EnhancemenT (TriPLET) framework, by leveraging the advantages of a hybrid denoising-and-reconstruction process and a triple-domain representation (i.e., sinograms, frequency spectrum maps, and images) to reconstruct SPET images from LPET sinograms. Specifically, TriPLET consists of three sequentially coupled components including 1) a Transformer-assisted denoising network that denoises the inputted LPET sinograms in the projection domain, 2) a discrete-wavelet-transform-based reconstruction network that further reconstructs SPET from LPET in the wavelet domain, and 3) a pair-based adversarial network that evaluates the reconstructed SPET images in the image domain. Extensive experiments on the real PET dataset demonstrate that our proposed TriPLET can reconstruct SPET images with the highest similarity and signal-to-noise ratio to real data, compared with state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03617v1-abstract-full').style.display = 'none'; document.getElementById('2412.03617v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00800">arXiv:2412.00800</a> <span> [<a href="https://arxiv.org/pdf/2412.00800">pdf</a>, <a href="https://arxiv.org/format/2412.00800">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Comprehensive Guide to Explainable AI: From Classical Models to LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hsieh%2C+W">Weiche Hsieh</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Z">Ziqian Bi</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chuanqi Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Junyu Liu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+B">Benji Peng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Sen Zhang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+X">Xuanhe Pan</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jiawei Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jinlang Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Keyu Chen</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+P">Pohsun Feng</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Y">Yizhu Wen</a>, <a href="/search/cs?searchtype=author&query=Song%2C+X">Xinyuan Song</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+T">Tianyang Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming Liu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Junjie Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Ming Li</a>, <a href="/search/cs?searchtype=author&query=Jing%2C+B">Bowen Jing</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+J">Jintao Ren</a>, <a href="/search/cs?searchtype=author&query=Song%2C+J">Junhao Song</a>, <a href="/search/cs?searchtype=author&query=Tseng%2C+H">Hong-Ming Tseng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yichao Zhang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L+K+Q">Lawrence K. Q. Yan</a>, <a href="/search/cs?searchtype=author&query=Niu%2C+Q">Qian Niu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Silin Chen</a> , et al. (2 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00800v2-abstract-short" style="display: inline;"> Explainable Artificial Intelligence (XAI) addresses the growing need for transparency and interpretability in AI systems, enabling trust and accountability in decision-making processes. This book offers a comprehensive guide to XAI, bridging foundational concepts with advanced methodologies. It explores interpretability in traditional models such as Decision Trees, Linear Regression, and Support V… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00800v2-abstract-full').style.display = 'inline'; document.getElementById('2412.00800v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00800v2-abstract-full" style="display: none;"> Explainable Artificial Intelligence (XAI) addresses the growing need for transparency and interpretability in AI systems, enabling trust and accountability in decision-making processes. This book offers a comprehensive guide to XAI, bridging foundational concepts with advanced methodologies. It explores interpretability in traditional models such as Decision Trees, Linear Regression, and Support Vector Machines, alongside the challenges of explaining deep learning architectures like CNNs, RNNs, and Large Language Models (LLMs), including BERT, GPT, and T5. The book presents practical techniques such as SHAP, LIME, Grad-CAM, counterfactual explanations, and causal inference, supported by Python code examples for real-world applications. Case studies illustrate XAI's role in healthcare, finance, and policymaking, demonstrating its impact on fairness and decision support. The book also covers evaluation metrics for explanation quality, an overview of cutting-edge XAI tools and frameworks, and emerging research directions, such as interpretability in federated learning and ethical AI considerations. Designed for a broad audience, this resource equips readers with the theoretical insights and practical skills needed to master XAI. Hands-on examples and additional resources are available at the companion GitHub repository: https://github.com/Echoslayer/XAI_From_Classical_Models_to_LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00800v2-abstract-full').style.display = 'none'; document.getElementById('2412.00800v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18133">arXiv:2411.18133</a> <span> [<a href="https://arxiv.org/pdf/2411.18133">pdf</a>, <a href="https://arxiv.org/format/2411.18133">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Towards Cross-device and Training-free Robotic Grasping in 3D Open World </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Weiguang Zhao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chenru Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chengrui Zhang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jie Sun</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yuyao Yan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rui Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaizhu Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18133v1-abstract-short" style="display: inline;"> Robotic grasping in the open world is a critical component of manufacturing and automation processes. While numerous existing approaches depend on 2D segmentation output to facilitate the grasping procedure, accurately determining depth from 2D imagery remains a challenge, often leading to limited performance in complex stacking scenarios. In contrast, techniques utilizing 3D point cloud data inhe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18133v1-abstract-full').style.display = 'inline'; document.getElementById('2411.18133v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18133v1-abstract-full" style="display: none;"> Robotic grasping in the open world is a critical component of manufacturing and automation processes. While numerous existing approaches depend on 2D segmentation output to facilitate the grasping procedure, accurately determining depth from 2D imagery remains a challenge, often leading to limited performance in complex stacking scenarios. In contrast, techniques utilizing 3D point cloud data inherently capture depth information, thus enabling adeptly navigating and manipulating a diverse range of complex stacking scenes. However, such efforts are considerably hindered by the variance in data capture devices and the unstructured nature of the data, which limits their generalizability. Consequently, much research is narrowly concentrated on managing designated objects within specific settings, which confines their real-world applicability. This paper presents a novel pipeline capable of executing object grasping tasks in open-world scenarios even on previously unseen objects without the necessity for training. Additionally, our pipeline supports the flexible use of different 3D point cloud segmentation models across a variety of scenes. Leveraging the segmentation results, we propose to engage a training-free binary clustering algorithm that not only improves segmentation precision but also possesses the capability to cluster and localize unseen objects for executing grasping operations. In our experiments, we investigate a range of open-world scenarios, and the outcomes underscore the remarkable robustness and generalizability of our pipeline, consistent across various environments, robots, cameras, and objects. The code will be made available upon acceptance of the paper. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18133v1-abstract-full').style.display = 'none'; document.getElementById('2411.18133v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.17189">arXiv:2411.17189</a> <span> [<a href="https://arxiv.org/pdf/2411.17189">pdf</a>, <a href="https://arxiv.org/format/2411.17189">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PhysMotion: Physics-Grounded Dynamics From a Single Image </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+X">Xiyang Tan</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Ying Jiang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xuan Li</a>, <a href="/search/cs?searchtype=author&query=Zong%2C+Z">Zeshun Zong</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+T">Tianyi Xie</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yin Yang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chenfanfu Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.17189v2-abstract-short" style="display: inline;"> We introduce PhysMotion, a novel framework that leverages principled physics-based simulations to guide intermediate 3D representations generated from a single image and input conditions (e.g., applied force and torque), producing high-quality, physically plausible video generation. By utilizing continuum mechanics-based simulations as a prior knowledge, our approach addresses the limitations of t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17189v2-abstract-full').style.display = 'inline'; document.getElementById('2411.17189v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.17189v2-abstract-full" style="display: none;"> We introduce PhysMotion, a novel framework that leverages principled physics-based simulations to guide intermediate 3D representations generated from a single image and input conditions (e.g., applied force and torque), producing high-quality, physically plausible video generation. By utilizing continuum mechanics-based simulations as a prior knowledge, our approach addresses the limitations of traditional data-driven generative models and result in more consistent physically plausible motions. Our framework begins by reconstructing a feed-forward 3D Gaussian from a single image through geometry optimization. This representation is then time-stepped using a differentiable Material Point Method (MPM) with continuum mechanics-based elastoplasticity models, which provides a strong foundation for realistic dynamics, albeit at a coarse level of detail. To enhance the geometry, appearance and ensure spatiotemporal consistency, we refine the initial simulation using a text-to-image (T2I) diffusion model with cross-frame attention, resulting in a physically plausible video that retains intricate details comparable to the input image. We conduct comprehensive qualitative and quantitative evaluations to validate the efficacy of our method. Our project page is available at: https://supertan0204.github.io/physmotion_website/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17189v2-abstract-full').style.display = 'none'; document.getElementById('2411.17189v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project Page: https://supertan0204.github.io/physmotion_website/</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.17089">arXiv:2411.17089</a> <span> [<a href="https://arxiv.org/pdf/2411.17089">pdf</a>, <a href="https://arxiv.org/ps/2411.17089">ps</a>, <a href="https://arxiv.org/format/2411.17089">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> Efficient LLM Inference with I/O-Aware Partial KV Cache Recomputation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chaoyi Jiang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+L">Lei Gao</a>, <a href="/search/cs?searchtype=author&query=Zarch%2C+H+E">Hossein Entezari Zarch</a>, <a href="/search/cs?searchtype=author&query=Annavaram%2C+M">Murali Annavaram</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.17089v1-abstract-short" style="display: inline;"> Inference for Large Language Models (LLMs) is computationally demanding. To reduce the cost of auto-regressive decoding, Key-Value (KV) caching is used to store intermediate activations, enabling GPUs to perform only the incremental computation required for each new token. This approach significantly lowers the computational overhead for token generation. However, the memory required for KV cachin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17089v1-abstract-full').style.display = 'inline'; document.getElementById('2411.17089v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.17089v1-abstract-full" style="display: none;"> Inference for Large Language Models (LLMs) is computationally demanding. To reduce the cost of auto-regressive decoding, Key-Value (KV) caching is used to store intermediate activations, enabling GPUs to perform only the incremental computation required for each new token. This approach significantly lowers the computational overhead for token generation. However, the memory required for KV caching grows rapidly, often exceeding the capacity of GPU memory. A cost-effective alternative is to offload KV cache to CPU memory, which alleviates GPU memory pressure but shifts the bottleneck to the limited bandwidth of the PCIe connection between the CPU and GPU. Existing methods attempt to address these issues by overlapping GPU computation with I/O or employing CPU-GPU heterogeneous execution, but they are hindered by excessive data movement and dependence on CPU capabilities. In this paper, we introduce an efficient CPU-GPU I/O-aware LLM inference method that avoids transferring the entire KV cache from CPU to GPU by recomputing partial KV cache from activations while concurrently transferring the remaining KV cache via PCIe bus. This approach overlaps GPU recomputation with data transfer to minimize idle GPU time and maximize inference performance. Our method is fully automated by integrating a profiler module that utilizes input characteristics and system hardware information, a scheduler module to optimize the distribution of computation and communication workloads, and a runtime module to efficiently execute the derived execution plan. Experimental results show that our method achieves up to 35.8% lower latency and 46.2% higher throughput during decoding compared to state-of-the-art approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17089v1-abstract-full').style.display = 'none'; document.getElementById('2411.17089v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15593">arXiv:2411.15593</a> <span> [<a href="https://arxiv.org/pdf/2411.15593">pdf</a>, <a href="https://arxiv.org/format/2411.15593">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Medillustrator: Improving Retrospective Learning in Physicians' Continuous Medical Education via Multimodal Diagnostic Data Alignment and Representation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yuansong Xu</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+J">Jiahe Dong</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+Y">Yijie Fan</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+Y">Yuheng Shao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chang Jiang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+L">Lixia Jin</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yuanwu Cao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Quan Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15593v1-abstract-short" style="display: inline;"> Continuous Medical Education (CME) plays a vital role in physicians' ongoing professional development. Beyond immediate diagnoses, physicians utilize multimodal diagnostic data for retrospective learning, engaging in self-directed analysis and collaborative discussions with peers. However, learning from such data effectively poses challenges for novice physicians, including screening and identifyi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15593v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15593v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15593v1-abstract-full" style="display: none;"> Continuous Medical Education (CME) plays a vital role in physicians' ongoing professional development. Beyond immediate diagnoses, physicians utilize multimodal diagnostic data for retrospective learning, engaging in self-directed analysis and collaborative discussions with peers. However, learning from such data effectively poses challenges for novice physicians, including screening and identifying valuable research cases, achieving fine-grained alignment and representation of multimodal data at the semantic level, and conducting comprehensive contextual analysis aided by reference data. To tackle these challenges, we introduce Medillustrator, a visual analytics system crafted to facilitate novice physicians' retrospective learning. Our structured approach enables novice physicians to explore and review research cases at an overview level and analyze specific cases with consistent alignment of multimodal and reference data. Furthermore, physicians can record and review analyzed results to facilitate further retrospection. The efficacy of Medillustrator in enhancing physicians' retrospective learning processes is demonstrated through a comprehensive case study and a controlled in-lab between-subject user study. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15593v1-abstract-full').style.display = 'none'; document.getElementById('2411.15593v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To be appeard in Proceedings of Chinese CHI 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15277">arXiv:2411.15277</a> <span> [<a href="https://arxiv.org/pdf/2411.15277">pdf</a>, <a href="https://arxiv.org/format/2411.15277">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Foundation Cures Personalization: Recovering Facial Personalized Models' Prompt Consistency </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cai%2C+Y">Yiyang Cai</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zhengkai Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yulong Liu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chunyang Jiang</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+W">Wei Xue</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+W">Wenhan Luo</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yike Guo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15277v1-abstract-short" style="display: inline;"> Facial personalization represents a crucial downstream task in the domain of text-to-image generation. To preserve identity fidelity while ensuring alignment with user-defined prompts, current mainstream frameworks for facial personalization predominantly employ identity embedding mechanisms to associate identity information with textual embeddings. However, our experiments show that identity embe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15277v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15277v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15277v1-abstract-full" style="display: none;"> Facial personalization represents a crucial downstream task in the domain of text-to-image generation. To preserve identity fidelity while ensuring alignment with user-defined prompts, current mainstream frameworks for facial personalization predominantly employ identity embedding mechanisms to associate identity information with textual embeddings. However, our experiments show that identity embeddings compromise the effectiveness of other tokens within the prompt, thereby hindering high prompt consistency, particularly when prompts involve multiple facial attributes. Moreover, previous works overlook the fact that their corresponding foundation models hold great potential to generate faces aligning to prompts well and can be easily leveraged to cure these ill-aligned attributes in personalized models. Building upon these insights, we propose FreeCure, a training-free framework that harnesses the intrinsic knowledge from the foundation models themselves to improve the prompt consistency of personalization models. First, by extracting cross-attention and semantic maps from the denoising process of foundation models, we identify easily localized attributes (e.g., hair, accessories, etc). Second, we enhance multiple attributes in the outputs of personalization models through a novel noise-blending strategy coupled with an inversion-based process. Our approach offers several advantages: it eliminates the need for training; it effectively facilitates the enhancement for a wide array of facial attributes in a non-intrusive manner; and it can be seamlessly integrated into existing popular personalization models. FreeCure has demonstrated significant improvements in prompt consistency across a diverse set of state-of-the-art facial personalization models while maintaining the integrity of original identity fidelity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15277v1-abstract-full').style.display = 'none'; document.getElementById('2411.15277v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14110">arXiv:2411.14110</a> <span> [<a href="https://arxiv.org/pdf/2411.14110">pdf</a>, <a href="https://arxiv.org/format/2411.14110">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> RAG-Thief: Scalable Extraction of Private Data from Retrieval-Augmented Generation Applications with Agent-based Attacks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Changyue Jiang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+X">Xudong Pan</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+G">Geng Hong</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+C">Chenfu Bao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+M">Min Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14110v1-abstract-short" style="display: inline;"> While large language models (LLMs) have achieved notable success in generative tasks, they still face limitations, such as lacking up-to-date knowledge and producing hallucinations. Retrieval-Augmented Generation (RAG) enhances LLM performance by integrating external knowledge bases, providing additional context which significantly improves accuracy and knowledge coverage. However, building these… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14110v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14110v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14110v1-abstract-full" style="display: none;"> While large language models (LLMs) have achieved notable success in generative tasks, they still face limitations, such as lacking up-to-date knowledge and producing hallucinations. Retrieval-Augmented Generation (RAG) enhances LLM performance by integrating external knowledge bases, providing additional context which significantly improves accuracy and knowledge coverage. However, building these external knowledge bases often requires substantial resources and may involve sensitive information. In this paper, we propose an agent-based automated privacy attack called RAG-Thief, which can extract a scalable amount of private data from the private database used in RAG applications. We conduct a systematic study on the privacy risks associated with RAG applications, revealing that the vulnerability of LLMs makes the private knowledge bases suffer significant privacy risks. Unlike previous manual attacks which rely on traditional prompt injection techniques, RAG-Thief starts with an initial adversarial query and learns from model responses, progressively generating new queries to extract as many chunks from the knowledge base as possible. Experimental results show that our RAG-Thief can extract over 70% information from the private knowledge bases within customized RAG applications deployed on local machines and real-world platforms, including OpenAI's GPTs and ByteDance's Coze. Our findings highlight the privacy vulnerabilities in current RAG applications and underscore the pressing need for stronger safeguards. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14110v1-abstract-full').style.display = 'none'; document.getElementById('2411.14110v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.13952">arXiv:2411.13952</a> <span> [<a href="https://arxiv.org/pdf/2411.13952">pdf</a>, <a href="https://arxiv.org/format/2411.13952">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Learning thin deformable object manipulation with a multi-sensory integrated soft hand </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+C">Chao Zhao</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chunli Jiang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+L">Lifan Luo</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+S">Shuai Yuan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Q">Qifeng Chen</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+H">Hongyu Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.13952v1-abstract-short" style="display: inline;"> Robotic manipulation has made significant advancements, with systems demonstrating high precision and repeatability. However, this remarkable precision often fails to translate into efficient manipulation of thin deformable objects. Current robotic systems lack imprecise dexterity, the ability to perform dexterous manipulation through robust and adaptive behaviors that do not rely on precise contr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13952v1-abstract-full').style.display = 'inline'; document.getElementById('2411.13952v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.13952v1-abstract-full" style="display: none;"> Robotic manipulation has made significant advancements, with systems demonstrating high precision and repeatability. However, this remarkable precision often fails to translate into efficient manipulation of thin deformable objects. Current robotic systems lack imprecise dexterity, the ability to perform dexterous manipulation through robust and adaptive behaviors that do not rely on precise control. This paper explores the singulation and grasping of thin, deformable objects. Here, we propose a novel solution that incorporates passive compliance, touch, and proprioception into thin, deformable object manipulation. Our system employs a soft, underactuated hand that provides passive compliance, facilitating adaptive and gentle interactions to dexterously manipulate deformable objects without requiring precise control. The tactile and force/torque sensors equipped on the hand, along with a depth camera, gather sensory data required for manipulation via the proposed slip module. The manipulation policies are learned directly from raw sensory data via model-free reinforcement learning, bypassing explicit environmental and object modeling. We implement a hierarchical double-loop learning process to enhance learning efficiency by decoupling the action space. Our method was deployed on real-world robots and trained in a self-supervised manner. The resulting policy was tested on a variety of challenging tasks that were beyond the capabilities of prior studies, ranging from displaying suit fabric like a salesperson to turning pages of sheet music for violinists. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.13952v1-abstract-full').style.display = 'none'; document.getElementById('2411.13952v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11909">arXiv:2411.11909</a> <span> [<a href="https://arxiv.org/pdf/2411.11909">pdf</a>, <a href="https://arxiv.org/format/2411.11909">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> SymDPO: Boosting In-Context Learning of Large Multimodal Models with Symbol Demonstration Direct Preference Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jia%2C+H">Hongrui Jia</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chaoya Jiang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Haiyang Xu</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+W">Wei Ye</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+M">Mengfan Dong</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+M">Ming Yan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Ji Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+F">Fei Huang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shikun Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11909v2-abstract-short" style="display: inline;"> As language models continue to scale, Large Language Models (LLMs) have exhibited emerging capabilities in In-Context Learning (ICL), enabling them to solve language tasks by prefixing a few in-context demonstrations (ICDs) as context. Inspired by these advancements, researchers have extended these techniques to develop Large Multimodal Models (LMMs) with ICL capabilities. However, existing LMMs f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11909v2-abstract-full').style.display = 'inline'; document.getElementById('2411.11909v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11909v2-abstract-full" style="display: none;"> As language models continue to scale, Large Language Models (LLMs) have exhibited emerging capabilities in In-Context Learning (ICL), enabling them to solve language tasks by prefixing a few in-context demonstrations (ICDs) as context. Inspired by these advancements, researchers have extended these techniques to develop Large Multimodal Models (LMMs) with ICL capabilities. However, existing LMMs face a critical issue: they often fail to effectively leverage the visual context in multimodal demonstrations and instead simply follow textual patterns. This indicates that LMMs do not achieve effective alignment between multimodal demonstrations and model outputs. To address this problem, we propose Symbol Demonstration Direct Preference Optimization (SymDPO). Specifically, SymDPO aims to break the traditional paradigm of constructing multimodal demonstrations by using random symbols to replace text answers within instances. This forces the model to carefully understand the demonstration images and establish a relationship between the images and the symbols to answer questions correctly. We validate the effectiveness of this method on multiple benchmarks, demonstrating that with SymDPO, LMMs can more effectively understand the multimodal context within examples and utilize this knowledge to answer questions better. Code is available at https://github.com/APiaoG/SymDPO. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11909v2-abstract-full').style.display = 'none'; document.getElementById('2411.11909v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.11697">arXiv:2411.11697</a> <span> [<a href="https://arxiv.org/pdf/2411.11697">pdf</a>, <a href="https://arxiv.org/format/2411.11697">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Robust Reinforcement Learning under Diffusion Models for Data with Jumps </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chenyang Jiang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Donggyu Kim</a>, <a href="/search/cs?searchtype=author&query=Quintos%2C+A">Alejandra Quintos</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yazhen Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.11697v1-abstract-short" style="display: inline;"> Reinforcement Learning (RL) has proven effective in solving complex decision-making tasks across various domains, but challenges remain in continuous-time settings, particularly when state dynamics are governed by stochastic differential equations (SDEs) with jump components. In this paper, we address this challenge by introducing the Mean-Square Bipower Variation Error (MSBVE) algorithm, which en… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11697v1-abstract-full').style.display = 'inline'; document.getElementById('2411.11697v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.11697v1-abstract-full" style="display: none;"> Reinforcement Learning (RL) has proven effective in solving complex decision-making tasks across various domains, but challenges remain in continuous-time settings, particularly when state dynamics are governed by stochastic differential equations (SDEs) with jump components. In this paper, we address this challenge by introducing the Mean-Square Bipower Variation Error (MSBVE) algorithm, which enhances robustness and convergence in scenarios involving significant stochastic noise and jumps. We first revisit the Mean-Square TD Error (MSTDE) algorithm, commonly used in continuous-time RL, and highlight its limitations in handling jumps in state dynamics. The proposed MSBVE algorithm minimizes the mean-square quadratic variation error, offering improved performance over MSTDE in environments characterized by SDEs with jumps. Simulations and formal proofs demonstrate that the MSBVE algorithm reliably estimates the value function in complex settings, surpassing MSTDE's performance when faced with jump processes. These findings underscore the importance of alternative error metrics to improve the resilience and effectiveness of RL algorithms in continuous-time frameworks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.11697v1-abstract-full').style.display = 'none'; document.getElementById('2411.11697v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10825">arXiv:2411.10825</a> <span> [<a href="https://arxiv.org/pdf/2411.10825">pdf</a>, <a href="https://arxiv.org/format/2411.10825">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> ARM: Appearance Reconstruction Model for Relightable 3D Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+X">Xiang Feng</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+C">Chang Yu</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+Z">Zoubin Bi</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+Y">Yintong Shang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+F">Feng Gao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Hongzhi Wu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+K">Kun Zhou</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chenfanfu Jiang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yin Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10825v1-abstract-short" style="display: inline;"> Recent image-to-3D reconstruction models have greatly advanced geometry generation, but they still struggle to faithfully generate realistic appearance. To address this, we introduce ARM, a novel method that reconstructs high-quality 3D meshes and realistic appearance from sparse-view images. The core of ARM lies in decoupling geometry from appearance, processing appearance within the UV texture s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10825v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10825v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10825v1-abstract-full" style="display: none;"> Recent image-to-3D reconstruction models have greatly advanced geometry generation, but they still struggle to faithfully generate realistic appearance. To address this, we introduce ARM, a novel method that reconstructs high-quality 3D meshes and realistic appearance from sparse-view images. The core of ARM lies in decoupling geometry from appearance, processing appearance within the UV texture space. Unlike previous methods, ARM improves texture quality by explicitly back-projecting measurements onto the texture map and processing them in a UV space module with a global receptive field. To resolve ambiguities between material and illumination in input images, ARM introduces a material prior that encodes semantic appearance information, enhancing the robustness of appearance decomposition. Trained on just 8 H100 GPUs, ARM outperforms existing methods both quantitatively and qualitatively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10825v1-abstract-full').style.display = 'none'; document.getElementById('2411.10825v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06106">arXiv:2411.06106</a> <span> [<a href="https://arxiv.org/pdf/2411.06106">pdf</a>, <a href="https://arxiv.org/format/2411.06106">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Personalize to generalize: Towards a universal medical multi-modality generalization through personalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+Z">Zhaorui Tan</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xi Yang</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+T">Tan Pan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tianyi Liu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chen Jiang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+X">Xin Guo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Q">Qiufeng Wang</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+A">Anh Nguyen</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+Y">Yuan Qi</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaizhu Huang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yuan Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06106v2-abstract-short" style="display: inline;"> The differences among medical imaging modalities, driven by distinct underlying principles, pose significant challenges for generalization in multi-modal medical tasks. Beyond modality gaps, individual variations, such as differences in organ size and metabolic rate, further impede a model's ability to generalize effectively across both modalities and diverse populations. Despite the importance of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06106v2-abstract-full').style.display = 'inline'; document.getElementById('2411.06106v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06106v2-abstract-full" style="display: none;"> The differences among medical imaging modalities, driven by distinct underlying principles, pose significant challenges for generalization in multi-modal medical tasks. Beyond modality gaps, individual variations, such as differences in organ size and metabolic rate, further impede a model's ability to generalize effectively across both modalities and diverse populations. Despite the importance of personalization, existing approaches to multi-modal generalization often neglect individual differences, focusing solely on common anatomical features. This limitation may result in weakened generalization in various medical tasks. In this paper, we unveil that personalization is critical for multi-modal generalization. Specifically, we propose an approach to achieve personalized generalization through approximating the underlying personalized invariant representation ${X}_h$ across various modalities by leveraging individual-level constraints and a learnable biological prior. We validate the feasibility and benefits of learning a personalized ${X}_h$, showing that this representation is highly generalizable and transferable across various multi-modal medical tasks. Extensive experimental results consistently show that the additionally incorporated personalization significantly improves performance and generalization across diverse scenarios, confirming its effectiveness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06106v2-abstract-full').style.display = 'none'; document.getElementById('2411.06106v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05815">arXiv:2411.05815</a> <span> [<a href="https://arxiv.org/pdf/2411.05815">pdf</a>, <a href="https://arxiv.org/format/2411.05815">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Statistical Finance">q-fin.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s11704-024-40474-y">10.1007/s11704-024-40474-y <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Graph Neural Networks for Financial Fraud Detection: A Review </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+D">Dawei Cheng</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+Y">Yao Zou</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+S">Sheng Xiang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Changjun Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05815v2-abstract-short" style="display: inline;"> The landscape of financial transactions has grown increasingly complex due to the expansion of global economic integration and advancements in information technology. This complexity poses greater challenges in detecting and managing financial fraud. This review explores the role of Graph Neural Networks (GNNs) in addressing these challenges by proposing a unified framework that categorizes existi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05815v2-abstract-full').style.display = 'inline'; document.getElementById('2411.05815v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05815v2-abstract-full" style="display: none;"> The landscape of financial transactions has grown increasingly complex due to the expansion of global economic integration and advancements in information technology. This complexity poses greater challenges in detecting and managing financial fraud. This review explores the role of Graph Neural Networks (GNNs) in addressing these challenges by proposing a unified framework that categorizes existing GNN methodologies applied to financial fraud detection. Specifically, by examining a series of detailed research questions, this review delves into the suitability of GNNs for financial fraud detection, their deployment in real-world scenarios, and the design considerations that enhance their effectiveness. This review reveals that GNNs are exceptionally adept at capturing complex relational patterns and dynamics within financial networks, significantly outperforming traditional fraud detection methods. Unlike previous surveys that often overlook the specific potentials of GNNs or address them only superficially, our review provides a comprehensive, structured analysis, distinctly focusing on the multifaceted applications and deployments of GNNs in financial fraud detection. This review not only highlights the potential of GNNs to improve fraud detection mechanisms but also identifies current gaps and outlines future research directions to enhance their deployment in financial systems. Through a structured review of over 100 studies, this review paper contributes to the understanding of GNN applications in financial fraud detection, offering insights into their adaptability and potential integration strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05815v2-abstract-full').style.display = 'none'; document.getElementById('2411.05815v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 Pages, 2 Figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02707">arXiv:2411.02707</a> <span> [<a href="https://arxiv.org/pdf/2411.02707">pdf</a>, <a href="https://arxiv.org/ps/2411.02707">ps</a>, <a href="https://arxiv.org/format/2411.02707">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Operator Algebras">math.OA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Phase Group Category of Bimodule Quantum Channels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+L">Linzhe Huang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chunlan Jiang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhengwei Liu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+J">Jinsong Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02707v1-abstract-short" style="display: inline;"> In this paper, we study the quantum channel on a von Neuamnn algebras $\mathcal{M}$ preserving a von Neumann subalgebra $\mathcal{N}$, namely $\mathcal{N}$-$\mathcal{N}$-bimodule unital completely positive map. By introducing the relative irreducibility of a bimodule quantum channel, we show that its eigenvalues with modulus 1 form a finite cyclic group, called its phase group. Moreover, the corre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02707v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02707v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02707v1-abstract-full" style="display: none;"> In this paper, we study the quantum channel on a von Neuamnn algebras $\mathcal{M}$ preserving a von Neumann subalgebra $\mathcal{N}$, namely $\mathcal{N}$-$\mathcal{N}$-bimodule unital completely positive map. By introducing the relative irreducibility of a bimodule quantum channel, we show that its eigenvalues with modulus 1 form a finite cyclic group, called its phase group. Moreover, the corresponding eigenspaces are invertible $\mathcal{N}$-$\mathcal{N}$-bimodules, which encode a categorification of the phase group. When $\mathcal{N}\subset \mathcal{M}$ is a finite-index irreducible subfactor of type II$_1$, we prove that any bimodule quantum channel is relative irreducible for the intermediate subfactor of its fixed points. In addition, we can reformulate and prove these results intrinsically in subfactor planar algebras without referring to the subfactor using the methods of quantum Fourier analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02707v1-abstract-full').style.display = 'none'; document.getElementById('2411.02707v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 46L37; 43A30 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00331">arXiv:2411.00331</a> <span> [<a href="https://arxiv.org/pdf/2411.00331">pdf</a>, <a href="https://arxiv.org/format/2411.00331">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Beyond Utility: Evaluating LLM as Recommender </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chumeng Jiang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiayin Wang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+W">Weizhi Ma</a>, <a href="/search/cs?searchtype=author&query=Clarke%2C+C+L+A">Charles L. A. Clarke</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuai Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+C">Chuhan Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Min Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00331v1-abstract-short" style="display: inline;"> With the rapid development of Large Language Models (LLMs), recent studies employed LLMs as recommenders to provide personalized information services for distinct users. Despite efforts to improve the accuracy of LLM-based recommendation models, relatively little attention is paid to beyond-utility dimensions. Moreover, there are unique evaluation aspects of LLM-based recommendation models, which… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00331v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00331v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00331v1-abstract-full" style="display: none;"> With the rapid development of Large Language Models (LLMs), recent studies employed LLMs as recommenders to provide personalized information services for distinct users. Despite efforts to improve the accuracy of LLM-based recommendation models, relatively little attention is paid to beyond-utility dimensions. Moreover, there are unique evaluation aspects of LLM-based recommendation models, which have been largely ignored. To bridge this gap, we explore four new evaluation dimensions and propose a multidimensional evaluation framework. The new evaluation dimensions include: 1) history length sensitivity, 2) candidate position bias, 3) generation-involved performance, and 4) hallucinations. All four dimensions have the potential to impact performance, but are largely unnecessary for consideration in traditional systems. Using this multidimensional evaluation framework, along with traditional aspects, we evaluate the performance of seven LLM-based recommenders, with three prompting strategies, comparing them with six traditional models on both ranking and re-ranking tasks on four datasets. We find that LLMs excel at handling tasks with prior knowledge and shorter input histories in the ranking setting, and perform better in the re-ranking setting, beating traditional models across multiple dimensions. However, LLMs exhibit substantial candidate position bias issues, and some models hallucinate non-existent items much more often than others. We intend our evaluation framework and observations to benefit future research on the use of LLMs as recommenders. The code and data are available at https://github.com/JiangDeccc/EvaLLMasRecommender. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00331v1-abstract-full').style.display = 'none'; document.getElementById('2411.00331v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.22909">arXiv:2410.22909</a> <span> [<a href="https://arxiv.org/pdf/2410.22909">pdf</a>, <a href="https://arxiv.org/format/2410.22909">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> UniRiT: Towards Few-Shot Non-Rigid Point Cloud Registration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+G">Geng Li</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+H">Haozhi Cao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Mingyang Liu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chenxi Jiang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jianfei Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.22909v1-abstract-short" style="display: inline;"> Non-rigid point cloud registration is a critical challenge in 3D scene understanding, particularly in surgical navigation. Although existing methods achieve excellent performance when trained on large-scale, high-quality datasets, these datasets are prohibitively expensive to collect and annotate, e.g., organ data in authentic medical scenarios. With insufficient training samples and data noise, e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22909v1-abstract-full').style.display = 'inline'; document.getElementById('2410.22909v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.22909v1-abstract-full" style="display: none;"> Non-rigid point cloud registration is a critical challenge in 3D scene understanding, particularly in surgical navigation. Although existing methods achieve excellent performance when trained on large-scale, high-quality datasets, these datasets are prohibitively expensive to collect and annotate, e.g., organ data in authentic medical scenarios. With insufficient training samples and data noise, existing methods degrade significantly since non-rigid patterns are more flexible and complicated than rigid ones, and the distributions across samples are more distinct, leading to higher difficulty in representation learning with few data. In this work, we aim to deal with this challenging few-shot non-rigid point cloud registration problem. Based on the observation that complex non-rigid transformation patterns can be decomposed into rigid and small non-rigid transformations, we propose a novel and effective framework, UniRiT. UniRiT adopts a two-step registration strategy that first aligns the centroids of the source and target point clouds and then refines the registration with non-rigid transformations, thereby significantly reducing the problem complexity. To validate the performance of UniRiT on real-world datasets, we introduce a new dataset, MedMatch3D, which consists of real human organs and exhibits high variability in sample distribution. We further establish a new challenging benchmark for few-shot non-rigid registration. Extensive empirical results demonstrate that UniRiT achieves state-of-the-art performance on MedMatch3D, improving the existing best approach by 94.22%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.22909v1-abstract-full').style.display = 'none'; document.getElementById('2410.22909v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">21 pages, 14 figures, under review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13311">arXiv:2410.13311</a> <span> [<a href="https://arxiv.org/pdf/2410.13311">pdf</a>, <a href="https://arxiv.org/format/2410.13311">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Dataset Distillation via Label Inconsistency Elimination and Learning Pattern Refinement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+C">Chuhao Zhou</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chenxi Jiang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yi Xie</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+H">Haozhi Cao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jianfei Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13311v1-abstract-short" style="display: inline;"> Dataset Distillation (DD) seeks to create a condensed dataset that, when used to train a model, enables the model to achieve performance similar to that of a model trained on the entire original dataset. It relieves the model training from processing massive data and thus reduces the computation resources, storage, and time costs. This paper illustrates our solution that ranks 1st in the ECCV-2024… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13311v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13311v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13311v1-abstract-full" style="display: none;"> Dataset Distillation (DD) seeks to create a condensed dataset that, when used to train a model, enables the model to achieve performance similar to that of a model trained on the entire original dataset. It relieves the model training from processing massive data and thus reduces the computation resources, storage, and time costs. This paper illustrates our solution that ranks 1st in the ECCV-2024 Data Distillation Challenge (track 1). Our solution, Modified Difficulty-Aligned Trajectory Matching (M-DATM), introduces two key modifications to the original state-of-the-art method DATM: (1) the soft labels learned by DATM do not achieve one-to-one correspondence with the counterparts generated by the official evaluation script, so we remove the soft labels technique to alleviate such inconsistency; (2) since the removal of soft labels makes it harder for the synthetic dataset to learn late trajectory information, particularly on Tiny ImageNet, we reduce the matching range, allowing the synthetic data to concentrate more on the easier patterns. In the final evaluation, our M-DATM achieved accuracies of 0.4061 and 0.1831 on the CIFAR-100 and Tiny ImageNet datasets, ranking 1st in the Fixed Images Per Class (IPC) Track. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13311v1-abstract-full').style.display = 'none'; document.getElementById('2410.13311v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ECCV 2024 Dataset Distillation Challenge</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13213">arXiv:2410.13213</a> <span> [<a href="https://arxiv.org/pdf/2410.13213">pdf</a>, <a href="https://arxiv.org/format/2410.13213">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> LLMOPT: Learning to Define and Solve General Optimization Problems from Scratch </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Caigao Jiang</a>, <a href="/search/cs?searchtype=author&query=Shu%2C+X">Xiang Shu</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+H">Hong Qian</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+X">Xingyu Lu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jun Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+A">Aimin Zhou</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Y">Yang Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13213v1-abstract-short" style="display: inline;"> Optimization problems are prevalent across various scenarios. Formulating and then solving optimization problems described by natural language often requires highly specialized human expertise, which could block the widespread application of optimization-based decision making. To make problem formulating and solving automated, leveraging large language models (LLMs) has emerged as a potential way.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13213v1-abstract-full').style.display = 'inline'; document.getElementById('2410.13213v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13213v1-abstract-full" style="display: none;"> Optimization problems are prevalent across various scenarios. Formulating and then solving optimization problems described by natural language often requires highly specialized human expertise, which could block the widespread application of optimization-based decision making. To make problem formulating and solving automated, leveraging large language models (LLMs) has emerged as a potential way. However, this kind of way suffers from the issue of optimization generalization. Namely, the accuracy of most current LLM-based methods and the generality of optimization problem types that they can model are still limited. In this paper, we propose a unified learning-based framework called LLMOPT to boost optimization generalization. Starting from the natural language descriptions of optimization problems and a pre-trained LLM, LLMOPT constructs the introduced five-element formulation as a universal model for learning to define diverse optimization problem types. Then, LLMOPT employs the multi-instruction tuning to enhance both problem formalization and solver code generation accuracy and generality. After that, to prevent hallucinations in LLMs, such as sacrificing solving accuracy to avoid execution errors, model alignment and self-correction mechanism are adopted in LLMOPT. We evaluate the optimization generalization ability of LLMOPT and compared methods across six real-world datasets covering roughly 20 fields such as health, environment, energy and manufacturing, etc. Extensive experiment results show that LLMOPT is able to model various optimization problem types such as linear/nonlinear programming, mixed integer programming and combinatorial optimization, and achieves a notable 11.08% average solving accuracy improvement compared with the state-of-the-art methods. The code is available at https://github.com/caigaojiang/LLMOPT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13213v1-abstract-full').style.display = 'none'; document.getElementById('2410.13213v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.10834">arXiv:2410.10834</a> <span> [<a href="https://arxiv.org/pdf/2410.10834">pdf</a>, <a href="https://arxiv.org/format/2410.10834">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Focus On What Matters: Separated Models For Visual-Based RL Generalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Di Zhang</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+B">Bowen Lv</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hai Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+F">Feifan Yang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Junqiao Zhao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+H">Hang Yu</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chang Huang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Hongtu Zhou</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+C">Chen Ye</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Changjun Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.10834v1-abstract-short" style="display: inline;"> A primary challenge for visual-based Reinforcement Learning (RL) is to generalize effectively across unseen environments. Although previous studies have explored different auxiliary tasks to enhance generalization, few adopt image reconstruction due to concerns about exacerbating overfitting to task-irrelevant features during training. Perceiving the pre-eminence of image reconstruction in represe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10834v1-abstract-full').style.display = 'inline'; document.getElementById('2410.10834v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.10834v1-abstract-full" style="display: none;"> A primary challenge for visual-based Reinforcement Learning (RL) is to generalize effectively across unseen environments. Although previous studies have explored different auxiliary tasks to enhance generalization, few adopt image reconstruction due to concerns about exacerbating overfitting to task-irrelevant features during training. Perceiving the pre-eminence of image reconstruction in representation learning, we propose SMG (Separated Models for Generalization), a novel approach that exploits image reconstruction for generalization. SMG introduces two model branches to extract task-relevant and task-irrelevant representations separately from visual observations via cooperatively reconstruction. Built upon this architecture, we further emphasize the importance of task-relevant features for generalization. Specifically, SMG incorporates two additional consistency losses to guide the agent's focus toward task-relevant areas across different scenarios, thereby achieving free from overfitting. Extensive experiments in DMC demonstrate the SOTA performance of SMG in generalization, particularly excelling in video-background settings. Evaluations on robotic manipulation tasks further confirm the robustness of SMG in real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.10834v1-abstract-full').style.display = 'none'; document.getElementById('2410.10834v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.09570">arXiv:2410.09570</a> <span> [<a href="https://arxiv.org/pdf/2410.09570">pdf</a>, <a href="https://arxiv.org/format/2410.09570">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> GETS: Ensemble Temperature Scaling for Calibration in Graph Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhuang%2C+D">Dingyi Zhuang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Chonghe Jiang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yunhan Zheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shenhao Wang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jinhua Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.09570v1-abstract-short" style="display: inline;"> Graph Neural Networks deliver strong classification results but often suffer from poor calibration performance, leading to overconfidence or underconfidence. This is particularly problematic in high stakes applications where accurate uncertainty estimates are essential. Existing post hoc methods, such as temperature scaling, fail to effectively utilize graph structures, while current GNN calibrati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09570v1-abstract-full').style.display = 'inline'; document.getElementById('2410.09570v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.09570v1-abstract-full" style="display: none;"> Graph Neural Networks deliver strong classification results but often suffer from poor calibration performance, leading to overconfidence or underconfidence. This is particularly problematic in high stakes applications where accurate uncertainty estimates are essential. Existing post hoc methods, such as temperature scaling, fail to effectively utilize graph structures, while current GNN calibration methods often overlook the potential of leveraging diverse input information and model ensembles jointly. In the paper, we propose Graph Ensemble Temperature Scaling, a novel calibration framework that combines input and model ensemble strategies within a Graph Mixture of Experts archi SOTA calibration techniques, reducing expected calibration error by 25 percent across 10 GNN benchmark datasets. Additionally, GETS is computationally efficient, scalable, and capable of selecting effective input combinations for improved calibration performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.09570v1-abstract-full').style.display = 'none'; document.getElementById('2410.09570v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Jiang%2C+C&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Jiang%2C+C&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Jiang%2C+C&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Jiang%2C+C&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Jiang%2C+C&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Jiang%2C+C&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>